diff options
-rw-r--r-- | doc/chan.n | 29 | ||||
-rw-r--r-- | generic/tclIO.c | 74 | ||||
-rw-r--r-- | tests/chanio.test | 6 | ||||
-rw-r--r-- | tests/io.test | 61 | ||||
-rw-r--r-- | tests/ioCmd.test | 3 | ||||
-rw-r--r-- | tests/zlib.test | 4 |
6 files changed, 95 insertions, 82 deletions
@@ -124,18 +124,8 @@ returned by \fBencoding names\fR, or from Unicode to the encoding. .RS .PP -\fBbinary\fR is an alias for \fBiso8859-1\fR: Each byte read from the -channel becomes the Unicode character having the same value as that byte, and -each character written to the channel becomes a single byte in the output, -allowing Tcl to work seamlessly with binary data as long as each "character" in -the data remains in the range of 0 to 255 so that there is no distinction between -binary data and text. For example, A JPEG image can be read from a -\fBbinary\fR channel, manipulated, and then written back to a \fBbinary\fR -channel. - -For working with binary data \fB\-translation binary\fR is usually used -instead, as it sets the encoding to \fBbinary\fR and also disables other -translations on the channel. +\fBbinary\fR is an alias for \fBiso8859-1\fR. This alone is not sufficient for +working with binary data. Use \fB\-translation binary\fR instead. .PP The encoding of a new channel is the value of \fBencoding system\fR, which returns the platform- and locale-dependent system encoding used to @@ -196,10 +186,17 @@ platforms it is \fBcrlf\fR for both input and output. .TP \fBbinary\fR . -Like \fBlf\fR, no end-of-line translation is performed, but in addition, -\fB\-eofchar\fR is set to the empty string to disable it, and \fB\-encoding\fR -is set to \fBbinary\fR. With this one setting, a channel is fully configured -for binary input and output. +Like \fBlf\fR, no end-of-line translation is performed, but in addition, sets +\fB\-eofchar\fR to the empty string to disable it, sets \fB\-encoding\fR to +\fBiso8859-1\fR, and sets \fB-profile\fR to \fBstrict\fR so the the channel is +fully configured for binary input and output: Each byte read from the channel +becomes the Unicode character having the same value as that byte, and each +character written to the channel becomes a single byte in the output. This +makes it possible to work seamlessly with binary data as long as each character +in the data remains in the range of 0 to 255 so that there is no distinction +between binary data and text. For example, A JPEG image can be read from a +such a channel, manipulated, and then written back to such a channel. + .TP \fBcr\fR . diff --git a/generic/tclIO.c b/generic/tclIO.c index a45f39a..cea8119 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -1675,11 +1675,8 @@ Tcl_CreateChannel( * interpretation that Tcl_Channels give to the "-encoding binary" option. */ - statePtr->encoding = NULL; name = Tcl_GetEncodingName(NULL); - if (strcmp(name, "binary") != 0) { - statePtr->encoding = Tcl_GetEncoding(NULL, name); - } + statePtr->encoding = Tcl_GetEncoding(NULL, name); statePtr->inputEncodingState = NULL; statePtr->inputEncodingFlags = TCL_ENCODING_START; CHANNEL_PROFILE_SET(statePtr->inputEncodingFlags, @@ -3480,7 +3477,8 @@ TclClose( stickyError = 0; - if (GotFlag(statePtr, TCL_WRITABLE) && (statePtr->encoding != NULL) + if (GotFlag(statePtr, TCL_WRITABLE) + && (statePtr->encoding != GetBinaryEncoding()) && !(statePtr->outputEncodingFlags & TCL_ENCODING_START)) { int code = CheckChannelErrors(statePtr, TCL_WRITABLE); @@ -4269,11 +4267,7 @@ Tcl_WriteObj( do { int chunkSize = srcLen > INT_MAX ? INT_MAX : srcLen; int written; - if (statePtr->encoding == NULL) { - written = WriteBytes(chanPtr, src, chunkSize); - } else { - written = WriteChars(chanPtr, src, chunkSize); - } + written = WriteChars(chanPtr, src, chunkSize); if (written < 0) { return TCL_INDEX_NONE; } @@ -4651,7 +4645,7 @@ Tcl_GetsObj( * done on objPtr. */ - if ((statePtr->encoding == NULL) + if (statePtr->encoding == GetBinaryEncoding() && ((statePtr->inputTranslation == TCL_TRANSLATE_LF) || (statePtr->inputTranslation == TCL_TRANSLATE_CR)) && Tcl_GetByteArrayFromObj(objPtr, (size_t *)NULL) != NULL) { @@ -4682,15 +4676,6 @@ Tcl_GetsObj( } /* - * If there is no encoding, use "iso8859-1" -- Tcl_GetsObj() doesn't - * produce ByteArray objects. - */ - - if (encoding == NULL) { - encoding = GetBinaryEncoding(); - } - - /* * Object used by FilterInputBytes to keep track of how much data has been * consumed from the channel buffers. */ @@ -5236,7 +5221,7 @@ TclGetsObjBinary( * XXX - unimplemented. */ - if (statePtr->encoding != NULL) { + if (statePtr->encoding != GetBinaryEncoding()) { } /* @@ -5951,7 +5936,7 @@ DoReadChars( #define UTF_EXPANSION_FACTOR 1024 int factor = UTF_EXPANSION_FACTOR; - binaryMode = (encoding == NULL) + binaryMode = (encoding == GetBinaryEncoding()) && (statePtr->inputTranslation == TCL_TRANSLATE_LF) && (statePtr->inEofChar == '\0'); @@ -6244,8 +6229,7 @@ ReadChars( * UTF-8. On output, contains another guess * based on the data seen so far. */ { - Tcl_Encoding encoding = statePtr->encoding? statePtr->encoding - : GetBinaryEncoding(); + Tcl_Encoding encoding = statePtr->encoding; Tcl_EncodingState savedState = statePtr->inputEncodingState; ChannelBuffer *bufPtr = statePtr->inQueueHead; int savedIEFlags = statePtr->inputEncodingFlags; @@ -7971,12 +7955,8 @@ Tcl_GetChannelOption( if (len == 0) { Tcl_DStringAppendElement(dsPtr, "-encoding"); } - if (statePtr->encoding == NULL) { - Tcl_DStringAppendElement(dsPtr, "binary"); - } else { - Tcl_DStringAppendElement(dsPtr, - Tcl_GetEncodingName(statePtr->encoding)); - } + Tcl_DStringAppendElement(dsPtr, + Tcl_GetEncodingName(statePtr->encoding)); if (len > 0) { return TCL_OK; } @@ -8196,7 +8176,13 @@ Tcl_SetChannelOption( int profile; if ((newValue[0] == '\0') || (strcmp(newValue, "binary") == 0)) { - encoding = NULL; + encoding = Tcl_GetEncoding(NULL, "iso8859-1"); + CHANNEL_PROFILE_SET(statePtr->inputEncodingFlags + ,CHANNEL_PROFILE_GET(statePtr->inputEncodingFlags) + |TCL_ENCODING_PROFILE_STRICT); + CHANNEL_PROFILE_SET(statePtr->outputEncodingFlags + ,CHANNEL_PROFILE_GET(statePtr->outputEncodingFlags) + |TCL_ENCODING_PROFILE_STRICT); } else { encoding = Tcl_GetEncoding(interp, newValue); if (encoding == NULL) { @@ -8209,7 +8195,7 @@ Tcl_SetChannelOption( * iso2022, the terminated escape sequence must write to the buffer. */ - if ((statePtr->encoding != NULL) + if ((statePtr->encoding != GetBinaryEncoding()) && !(statePtr->outputEncodingFlags & TCL_ENCODING_START) && (CheckChannelErrors(statePtr, TCL_WRITABLE) == 0)) { statePtr->outputEncodingFlags |= TCL_ENCODING_END; @@ -8304,7 +8290,13 @@ Tcl_SetChannelOption( translation = TCL_TRANSLATE_LF; statePtr->inEofChar = 0; Tcl_FreeEncoding(statePtr->encoding); - statePtr->encoding = NULL; + statePtr->encoding = Tcl_GetEncoding(NULL, "iso8859-1"); + CHANNEL_PROFILE_SET(statePtr->inputEncodingFlags + ,CHANNEL_PROFILE_GET(statePtr->inputEncodingFlags) + |TCL_ENCODING_PROFILE_STRICT); + CHANNEL_PROFILE_SET(statePtr->outputEncodingFlags + ,CHANNEL_PROFILE_GET(statePtr->outputEncodingFlags) + |TCL_ENCODING_PROFILE_STRICT); } else if (strcmp(readMode, "lf") == 0) { translation = TCL_TRANSLATE_LF; } else if (strcmp(readMode, "cr") == 0) { @@ -8353,7 +8345,13 @@ Tcl_SetChannelOption( } else if (strcmp(writeMode, "binary") == 0) { statePtr->outputTranslation = TCL_TRANSLATE_LF; Tcl_FreeEncoding(statePtr->encoding); - statePtr->encoding = NULL; + statePtr->encoding = Tcl_GetEncoding(NULL, "iso8859-1"); + CHANNEL_PROFILE_SET(statePtr->inputEncodingFlags + ,CHANNEL_PROFILE_GET(statePtr->inputEncodingFlags) + |TCL_ENCODING_PROFILE_STRICT); + CHANNEL_PROFILE_SET(statePtr->outputEncodingFlags + ,CHANNEL_PROFILE_GET(statePtr->outputEncodingFlags) + |TCL_ENCODING_PROFILE_STRICT); } else if (strcmp(writeMode, "lf") == 0) { statePtr->outputTranslation = TCL_TRANSLATE_LF; } else if (strcmp(writeMode, "cr") == 0) { @@ -10271,13 +10269,9 @@ Lossless( && outStatePtr->outputTranslation == TCL_TRANSLATE_LF && ( ( - (inStatePtr->encoding == NULL - || inStatePtr->encoding == GetBinaryEncoding() - ) + inStatePtr->encoding == GetBinaryEncoding() && - (outStatePtr->encoding == NULL - || outStatePtr->encoding == GetBinaryEncoding() - ) + outStatePtr->encoding == GetBinaryEncoding() ) || ( diff --git a/tests/chanio.test b/tests/chanio.test index c3caa1c..680039c 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -6868,8 +6868,7 @@ test chan-io-52.9 {TclCopyChannel & encodings} {fcopy} { [file size $path(utf8-fcopy.txt)] \ [file size $path(utf8-rp.txt)] } {3 5 5} -test chan-io-52.10 {TclCopyChannel & encodings} {fcopy notWinCI} { - # encoding to binary (=> implies that the internal utf-8 is written) +test chan-io-52.10 {TclCopyChannel & encodings} -constraints {fcopy} -body { set in [open $path(kyrillic.txt) r] set out [open $path(utf8-fcopy.txt) w] chan configure $in -encoding koi8-r -translation lf @@ -6879,7 +6878,8 @@ test chan-io-52.10 {TclCopyChannel & encodings} {fcopy notWinCI} { chan close $in chan close $out file size $path(utf8-fcopy.txt) -} 5 +} -returnCodes 1 -match glob -result {error writing "*":\ + invalid or incomplete multibyte or wide character} test chan-io-52.11 {TclCopyChannel & encodings} -setup { set f [open $path(utf8-fcopy.txt) w] fconfigure $f -encoding utf-8 -translation lf diff --git a/tests/io.test b/tests/io.test index 5fd255c..444b3de 100644 --- a/tests/io.test +++ b/tests/io.test @@ -7500,10 +7500,7 @@ test io-52.9 {TclCopyChannel & encodings} {fcopy} { [file size $path(utf8-fcopy.txt)] \ [file size $path(utf8-rp.txt)] } {3 5 5} -test io-52.10 {TclCopyChannel & encodings} {fcopy notWinCI} { - # encoding to binary (=> implies that the - # internal utf-8 is written) - +test io-52.10 {TclCopyChannel & encodings} -constraints fcopy -body { set in [open $path(kyrillic.txt) r] set out [open $path(utf8-fcopy.txt) w] @@ -7516,7 +7513,8 @@ test io-52.10 {TclCopyChannel & encodings} {fcopy notWinCI} { close $out file size $path(utf8-fcopy.txt) -} 5 +} -returnCodes 1 -match glob -result {error writing "*":\ + invalid or incomplete multibyte or wide character} test io-52.11 {TclCopyChannel & encodings} -setup { set out [open $path(utf8-fcopy.txt) w] fconfigure $out -encoding utf-8 -translation lf -profile strict @@ -8374,7 +8372,7 @@ test io-53.13 {TclCopyChannel: read error reporting} -setup { catch {close $out} removeFile out rename driver {} -} -result {error reading "*": *} -returnCodes error -match glob +} -result {error reading "rc*": *} -returnCodes error -match glob test io-53.14 {TclCopyChannel: write error reporting} -setup { proc driver {cmd args} { variable buffer @@ -9264,7 +9262,7 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-profile tcl8)} -setup { removeFile io-75.5 } -result 4181 -test io-75.6 {invalid utf-8 encoding gets is not ignored (-profile strict)} -setup { +test io-75.6 {invalid utf-8 encoding, gets is not ignored (-profile strict)} -setup { set fn [makeFile {} io-75.6] set f [open $fn w+] fconfigure $f -encoding binary @@ -9278,7 +9276,8 @@ test io-75.6 {invalid utf-8 encoding gets is not ignored (-profile strict)} -set } -cleanup { close $f removeFile io-75.6 -} -match glob -returnCodes 1 -result {error reading "*": invalid or incomplete multibyte or wide character} +} -match glob -returnCodes 1 -result {error reading "file*":\ + invalid or incomplete multibyte or wide character} test io-75.7 {invalid utf-8 encoding gets is not ignored (-profile strict)} -setup { set fn [makeFile {} io-75.7] @@ -9294,7 +9293,8 @@ test io-75.7 {invalid utf-8 encoding gets is not ignored (-profile strict)} -set } -cleanup { close $f removeFile io-75.7 -} -match glob -returnCodes 1 -result {error reading "*": invalid or incomplete multibyte or wide character} +} -match glob -returnCodes 1 -result {error reading "file*":\ + invalid or incomplete multibyte or wide character} test io-75.8 {invalid utf-8 encoding eof handling (-profile strict)} -setup { set fn [makeFile {} io-75.8] @@ -9330,10 +9330,11 @@ test io-75.9 {unrepresentable character write passes and is replaced by ?} -setu removeFile io-75.9 } -match glob -result [list {A} {error writing "*": invalid or incomplete multibyte or wide character}] -# Incomplete sequence test. -# This error may IMHO only be detected with the close. -# But the read already returns the incomplete sequence. -test io-75.10 {incomplete multibyte encoding read is ignored} -setup { +test io-75.10 { + incomplete multibyte encoding read is not ignored because "binary" sets + profile to strict +} -setup { + set res {} set fn [makeFile {} io-75.10] set f [open $fn w+] fconfigure $f -encoding binary @@ -9342,13 +9343,21 @@ test io-75.10 {incomplete multibyte encoding read is ignored} -setup { seek $f 0 fconfigure $f -encoding utf-8 -buffering none } -body { + catch {read $f} errmsg + lappend res $errmsg + seek $f 0 + chan configure $f -profile tcl8 set d [read $f] binary scan $d H* hd - set hd + lappend res $hd + return $res } -cleanup { close $f removeFile io-75.10 -} -result 41c0 + unset result +} -match glob -result {{error reading "file*":\ + invalid or incomplete multibyte or wide character} 41c0} + # The current result returns the orphan byte as byte. # This may be expected due to special utf-8 handling. @@ -9372,9 +9381,14 @@ test io-75.11 {shiftjis encoding error read results in raw bytes} -setup { } -cleanup { close $f removeFile io-75.11 -} -match glob -result {41 1 {error reading "*": invalid or incomplete multibyte or wide character}} +} -match glob -result {41 1 {error reading "file*":\ + invalid or incomplete multibyte or wide character}} -test io-75.12 {invalid utf-8 encoding read is ignored} -setup { +test io-75.12 { + invalid utf-8 encoding read is not ignored because setting the encoding to + "binary" also set the profile to strict +} -setup { + set res {} set fn [makeFile {} io-75.12] set f [open $fn w+] fconfigure $f -encoding binary @@ -9383,13 +9397,20 @@ test io-75.12 {invalid utf-8 encoding read is ignored} -setup { seek $f 0 fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf } -body { + catch {read $f} errmsg + lappend res $errmsg + chan configure $f -profile tcl8 + seek $f 0 set d [read $f] binary scan $d H* hd - set hd + lappend res $hd + return $res } -cleanup { close $f removeFile io-75.12 -} -result 4181 + unset res +} -match glob -result {{error reading "file*":\ + invalid or incomplete multibyte or wide character} 4181} test io-75.13 {invalid utf-8 encoding read is not ignored (-profile strict)} -setup { set fn [makeFile {} io-75.13] set f [open $fn w+] @@ -9407,7 +9428,7 @@ test io-75.13 {invalid utf-8 encoding read is not ignored (-profile strict)} -se } -cleanup { close $f removeFile io-75.13 -} -match glob -result {41 1 {error reading "*": invalid or incomplete multibyte or wide character}} +} -match glob -result {41 1 {error reading "file*": invalid or incomplete multibyte or wide character}} # ### ### ### ######### ######### ######### diff --git a/tests/ioCmd.test b/tests/ioCmd.test index cab4745..2df2ca0 100644 --- a/tests/ioCmd.test +++ b/tests/ioCmd.test @@ -266,7 +266,7 @@ test iocmd-8.9 {fconfigure command} -setup { fconfigure $f1 } -cleanup { catch {close $f1} -} -result {-blocking 1 -buffering none -buffersize 4040 -encoding binary -eofchar {} -profile tcl8 -translation lf} +} -result {-blocking 1 -buffering none -buffersize 4040 -encoding iso8859-1 -eofchar {} -profile tcl8 -translation lf} test iocmd-8.10 {fconfigure command} -returnCodes error -body { fconfigure a b } -result {can not find channel named "a"} @@ -496,6 +496,7 @@ test iocmd-12.10 {POSIX open access modes: BINARY} { set result } 5 test iocmd-12.11 {POSIX open access modes: BINARY} -body { + after 100 set f [open $path(test1) {WRONLY BINARY TRUNC}] puts $f Ɉ ;# throws an exception } -cleanup { diff --git a/tests/zlib.test b/tests/zlib.test index 720fdd6..93c568b 100644 --- a/tests/zlib.test +++ b/tests/zlib.test @@ -292,7 +292,7 @@ test zlib-8.6 {transformation and fconfigure} -setup { } -cleanup { catch {close $fd} removeFile $file -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf -checksum 1 -dictionary {}} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding iso8859-1 -eofchar {} -profile strict -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding iso8859-1 -eofchar {} -profile strict -translation lf -checksum 1 -dictionary {}} {-blocking 1 -buffering full -buffersize 4096 -encoding iso8859-1 -eofchar {} -profile strict -translation lf}} test zlib-8.7 {transformation and fconfigure} -setup { set file [makeFile {} test.gz] set fd [open $file wb] @@ -302,7 +302,7 @@ test zlib-8.7 {transformation and fconfigure} -setup { } -cleanup { catch {close $fd} removeFile $file -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf -checksum 0} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding iso8859-1 -eofchar {} -profile strict -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding iso8859-1 -eofchar {} -profile strict -translation lf -checksum 0} {-blocking 1 -buffering full -buffersize 4096 -encoding iso8859-1 -eofchar {} -profile strict -translation lf}} # Input is headers from fetching SPDY draft # Dictionary is that which is proposed _in_ SPDY draft set spdyHeaders "HTTP/1.0 200 OK\r\nContent-Type: text/html; charset=utf-8\r\nX-Robots-Tag: noarchive\r\nLast-Modified: Tue, 05 Jun 2012 02:43:25 GMT\r\nETag: \"1338864205129|#public|0|en|||0\"\r\nExpires: Tue, 05 Jun 2012 16:17:11 GMT\r\nDate: Tue, 05 Jun 2012 16:17:06 GMT\r\nCache-Control: public, max-age=5\r\nX-Content-Type-Options: nosniff\r\nX-XSS-Protection: 1; mode=block\r\nServer: GSE\r\n" |