summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--doc/chan.n29
-rw-r--r--generic/tclIO.c74
-rw-r--r--tests/chanio.test6
-rw-r--r--tests/io.test61
-rw-r--r--tests/ioCmd.test3
-rw-r--r--tests/zlib.test4
6 files changed, 95 insertions, 82 deletions
diff --git a/doc/chan.n b/doc/chan.n
index 14fa941..62121d1 100644
--- a/doc/chan.n
+++ b/doc/chan.n
@@ -124,18 +124,8 @@ returned by \fBencoding names\fR, or
from Unicode to the encoding.
.RS
.PP
-\fBbinary\fR is an alias for \fBiso8859-1\fR: Each byte read from the
-channel becomes the Unicode character having the same value as that byte, and
-each character written to the channel becomes a single byte in the output,
-allowing Tcl to work seamlessly with binary data as long as each "character" in
-the data remains in the range of 0 to 255 so that there is no distinction between
-binary data and text. For example, A JPEG image can be read from a
-\fBbinary\fR channel, manipulated, and then written back to a \fBbinary\fR
-channel.
-
-For working with binary data \fB\-translation binary\fR is usually used
-instead, as it sets the encoding to \fBbinary\fR and also disables other
-translations on the channel.
+\fBbinary\fR is an alias for \fBiso8859-1\fR. This alone is not sufficient for
+working with binary data. Use \fB\-translation binary\fR instead.
.PP
The encoding of a new channel is the value of \fBencoding system\fR,
which returns the platform- and locale-dependent system encoding used to
@@ -196,10 +186,17 @@ platforms it is \fBcrlf\fR for both input and output.
.TP
\fBbinary\fR
.
-Like \fBlf\fR, no end-of-line translation is performed, but in addition,
-\fB\-eofchar\fR is set to the empty string to disable it, and \fB\-encoding\fR
-is set to \fBbinary\fR. With this one setting, a channel is fully configured
-for binary input and output.
+Like \fBlf\fR, no end-of-line translation is performed, but in addition, sets
+\fB\-eofchar\fR to the empty string to disable it, sets \fB\-encoding\fR to
+\fBiso8859-1\fR, and sets \fB-profile\fR to \fBstrict\fR so the the channel is
+fully configured for binary input and output: Each byte read from the channel
+becomes the Unicode character having the same value as that byte, and each
+character written to the channel becomes a single byte in the output. This
+makes it possible to work seamlessly with binary data as long as each character
+in the data remains in the range of 0 to 255 so that there is no distinction
+between binary data and text. For example, A JPEG image can be read from a
+such a channel, manipulated, and then written back to such a channel.
+
.TP
\fBcr\fR
.
diff --git a/generic/tclIO.c b/generic/tclIO.c
index a45f39a..cea8119 100644
--- a/generic/tclIO.c
+++ b/generic/tclIO.c
@@ -1675,11 +1675,8 @@ Tcl_CreateChannel(
* interpretation that Tcl_Channels give to the "-encoding binary" option.
*/
- statePtr->encoding = NULL;
name = Tcl_GetEncodingName(NULL);
- if (strcmp(name, "binary") != 0) {
- statePtr->encoding = Tcl_GetEncoding(NULL, name);
- }
+ statePtr->encoding = Tcl_GetEncoding(NULL, name);
statePtr->inputEncodingState = NULL;
statePtr->inputEncodingFlags = TCL_ENCODING_START;
CHANNEL_PROFILE_SET(statePtr->inputEncodingFlags,
@@ -3480,7 +3477,8 @@ TclClose(
stickyError = 0;
- if (GotFlag(statePtr, TCL_WRITABLE) && (statePtr->encoding != NULL)
+ if (GotFlag(statePtr, TCL_WRITABLE)
+ && (statePtr->encoding != GetBinaryEncoding())
&& !(statePtr->outputEncodingFlags & TCL_ENCODING_START)) {
int code = CheckChannelErrors(statePtr, TCL_WRITABLE);
@@ -4269,11 +4267,7 @@ Tcl_WriteObj(
do {
int chunkSize = srcLen > INT_MAX ? INT_MAX : srcLen;
int written;
- if (statePtr->encoding == NULL) {
- written = WriteBytes(chanPtr, src, chunkSize);
- } else {
- written = WriteChars(chanPtr, src, chunkSize);
- }
+ written = WriteChars(chanPtr, src, chunkSize);
if (written < 0) {
return TCL_INDEX_NONE;
}
@@ -4651,7 +4645,7 @@ Tcl_GetsObj(
* done on objPtr.
*/
- if ((statePtr->encoding == NULL)
+ if (statePtr->encoding == GetBinaryEncoding()
&& ((statePtr->inputTranslation == TCL_TRANSLATE_LF)
|| (statePtr->inputTranslation == TCL_TRANSLATE_CR))
&& Tcl_GetByteArrayFromObj(objPtr, (size_t *)NULL) != NULL) {
@@ -4682,15 +4676,6 @@ Tcl_GetsObj(
}
/*
- * If there is no encoding, use "iso8859-1" -- Tcl_GetsObj() doesn't
- * produce ByteArray objects.
- */
-
- if (encoding == NULL) {
- encoding = GetBinaryEncoding();
- }
-
- /*
* Object used by FilterInputBytes to keep track of how much data has been
* consumed from the channel buffers.
*/
@@ -5236,7 +5221,7 @@ TclGetsObjBinary(
* XXX - unimplemented.
*/
- if (statePtr->encoding != NULL) {
+ if (statePtr->encoding != GetBinaryEncoding()) {
}
/*
@@ -5951,7 +5936,7 @@ DoReadChars(
#define UTF_EXPANSION_FACTOR 1024
int factor = UTF_EXPANSION_FACTOR;
- binaryMode = (encoding == NULL)
+ binaryMode = (encoding == GetBinaryEncoding())
&& (statePtr->inputTranslation == TCL_TRANSLATE_LF)
&& (statePtr->inEofChar == '\0');
@@ -6244,8 +6229,7 @@ ReadChars(
* UTF-8. On output, contains another guess
* based on the data seen so far. */
{
- Tcl_Encoding encoding = statePtr->encoding? statePtr->encoding
- : GetBinaryEncoding();
+ Tcl_Encoding encoding = statePtr->encoding;
Tcl_EncodingState savedState = statePtr->inputEncodingState;
ChannelBuffer *bufPtr = statePtr->inQueueHead;
int savedIEFlags = statePtr->inputEncodingFlags;
@@ -7971,12 +7955,8 @@ Tcl_GetChannelOption(
if (len == 0) {
Tcl_DStringAppendElement(dsPtr, "-encoding");
}
- if (statePtr->encoding == NULL) {
- Tcl_DStringAppendElement(dsPtr, "binary");
- } else {
- Tcl_DStringAppendElement(dsPtr,
- Tcl_GetEncodingName(statePtr->encoding));
- }
+ Tcl_DStringAppendElement(dsPtr,
+ Tcl_GetEncodingName(statePtr->encoding));
if (len > 0) {
return TCL_OK;
}
@@ -8196,7 +8176,13 @@ Tcl_SetChannelOption(
int profile;
if ((newValue[0] == '\0') || (strcmp(newValue, "binary") == 0)) {
- encoding = NULL;
+ encoding = Tcl_GetEncoding(NULL, "iso8859-1");
+ CHANNEL_PROFILE_SET(statePtr->inputEncodingFlags
+ ,CHANNEL_PROFILE_GET(statePtr->inputEncodingFlags)
+ |TCL_ENCODING_PROFILE_STRICT);
+ CHANNEL_PROFILE_SET(statePtr->outputEncodingFlags
+ ,CHANNEL_PROFILE_GET(statePtr->outputEncodingFlags)
+ |TCL_ENCODING_PROFILE_STRICT);
} else {
encoding = Tcl_GetEncoding(interp, newValue);
if (encoding == NULL) {
@@ -8209,7 +8195,7 @@ Tcl_SetChannelOption(
* iso2022, the terminated escape sequence must write to the buffer.
*/
- if ((statePtr->encoding != NULL)
+ if ((statePtr->encoding != GetBinaryEncoding())
&& !(statePtr->outputEncodingFlags & TCL_ENCODING_START)
&& (CheckChannelErrors(statePtr, TCL_WRITABLE) == 0)) {
statePtr->outputEncodingFlags |= TCL_ENCODING_END;
@@ -8304,7 +8290,13 @@ Tcl_SetChannelOption(
translation = TCL_TRANSLATE_LF;
statePtr->inEofChar = 0;
Tcl_FreeEncoding(statePtr->encoding);
- statePtr->encoding = NULL;
+ statePtr->encoding = Tcl_GetEncoding(NULL, "iso8859-1");
+ CHANNEL_PROFILE_SET(statePtr->inputEncodingFlags
+ ,CHANNEL_PROFILE_GET(statePtr->inputEncodingFlags)
+ |TCL_ENCODING_PROFILE_STRICT);
+ CHANNEL_PROFILE_SET(statePtr->outputEncodingFlags
+ ,CHANNEL_PROFILE_GET(statePtr->outputEncodingFlags)
+ |TCL_ENCODING_PROFILE_STRICT);
} else if (strcmp(readMode, "lf") == 0) {
translation = TCL_TRANSLATE_LF;
} else if (strcmp(readMode, "cr") == 0) {
@@ -8353,7 +8345,13 @@ Tcl_SetChannelOption(
} else if (strcmp(writeMode, "binary") == 0) {
statePtr->outputTranslation = TCL_TRANSLATE_LF;
Tcl_FreeEncoding(statePtr->encoding);
- statePtr->encoding = NULL;
+ statePtr->encoding = Tcl_GetEncoding(NULL, "iso8859-1");
+ CHANNEL_PROFILE_SET(statePtr->inputEncodingFlags
+ ,CHANNEL_PROFILE_GET(statePtr->inputEncodingFlags)
+ |TCL_ENCODING_PROFILE_STRICT);
+ CHANNEL_PROFILE_SET(statePtr->outputEncodingFlags
+ ,CHANNEL_PROFILE_GET(statePtr->outputEncodingFlags)
+ |TCL_ENCODING_PROFILE_STRICT);
} else if (strcmp(writeMode, "lf") == 0) {
statePtr->outputTranslation = TCL_TRANSLATE_LF;
} else if (strcmp(writeMode, "cr") == 0) {
@@ -10271,13 +10269,9 @@ Lossless(
&& outStatePtr->outputTranslation == TCL_TRANSLATE_LF
&& (
(
- (inStatePtr->encoding == NULL
- || inStatePtr->encoding == GetBinaryEncoding()
- )
+ inStatePtr->encoding == GetBinaryEncoding()
&&
- (outStatePtr->encoding == NULL
- || outStatePtr->encoding == GetBinaryEncoding()
- )
+ outStatePtr->encoding == GetBinaryEncoding()
)
||
(
diff --git a/tests/chanio.test b/tests/chanio.test
index c3caa1c..680039c 100644
--- a/tests/chanio.test
+++ b/tests/chanio.test
@@ -6868,8 +6868,7 @@ test chan-io-52.9 {TclCopyChannel & encodings} {fcopy} {
[file size $path(utf8-fcopy.txt)] \
[file size $path(utf8-rp.txt)]
} {3 5 5}
-test chan-io-52.10 {TclCopyChannel & encodings} {fcopy notWinCI} {
- # encoding to binary (=> implies that the internal utf-8 is written)
+test chan-io-52.10 {TclCopyChannel & encodings} -constraints {fcopy} -body {
set in [open $path(kyrillic.txt) r]
set out [open $path(utf8-fcopy.txt) w]
chan configure $in -encoding koi8-r -translation lf
@@ -6879,7 +6878,8 @@ test chan-io-52.10 {TclCopyChannel & encodings} {fcopy notWinCI} {
chan close $in
chan close $out
file size $path(utf8-fcopy.txt)
-} 5
+} -returnCodes 1 -match glob -result {error writing "*":\
+ invalid or incomplete multibyte or wide character}
test chan-io-52.11 {TclCopyChannel & encodings} -setup {
set f [open $path(utf8-fcopy.txt) w]
fconfigure $f -encoding utf-8 -translation lf
diff --git a/tests/io.test b/tests/io.test
index 5fd255c..444b3de 100644
--- a/tests/io.test
+++ b/tests/io.test
@@ -7500,10 +7500,7 @@ test io-52.9 {TclCopyChannel & encodings} {fcopy} {
[file size $path(utf8-fcopy.txt)] \
[file size $path(utf8-rp.txt)]
} {3 5 5}
-test io-52.10 {TclCopyChannel & encodings} {fcopy notWinCI} {
- # encoding to binary (=> implies that the
- # internal utf-8 is written)
-
+test io-52.10 {TclCopyChannel & encodings} -constraints fcopy -body {
set in [open $path(kyrillic.txt) r]
set out [open $path(utf8-fcopy.txt) w]
@@ -7516,7 +7513,8 @@ test io-52.10 {TclCopyChannel & encodings} {fcopy notWinCI} {
close $out
file size $path(utf8-fcopy.txt)
-} 5
+} -returnCodes 1 -match glob -result {error writing "*":\
+ invalid or incomplete multibyte or wide character}
test io-52.11 {TclCopyChannel & encodings} -setup {
set out [open $path(utf8-fcopy.txt) w]
fconfigure $out -encoding utf-8 -translation lf -profile strict
@@ -8374,7 +8372,7 @@ test io-53.13 {TclCopyChannel: read error reporting} -setup {
catch {close $out}
removeFile out
rename driver {}
-} -result {error reading "*": *} -returnCodes error -match glob
+} -result {error reading "rc*": *} -returnCodes error -match glob
test io-53.14 {TclCopyChannel: write error reporting} -setup {
proc driver {cmd args} {
variable buffer
@@ -9264,7 +9262,7 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-profile tcl8)} -setup {
removeFile io-75.5
} -result 4181
-test io-75.6 {invalid utf-8 encoding gets is not ignored (-profile strict)} -setup {
+test io-75.6 {invalid utf-8 encoding, gets is not ignored (-profile strict)} -setup {
set fn [makeFile {} io-75.6]
set f [open $fn w+]
fconfigure $f -encoding binary
@@ -9278,7 +9276,8 @@ test io-75.6 {invalid utf-8 encoding gets is not ignored (-profile strict)} -set
} -cleanup {
close $f
removeFile io-75.6
-} -match glob -returnCodes 1 -result {error reading "*": invalid or incomplete multibyte or wide character}
+} -match glob -returnCodes 1 -result {error reading "file*":\
+ invalid or incomplete multibyte or wide character}
test io-75.7 {invalid utf-8 encoding gets is not ignored (-profile strict)} -setup {
set fn [makeFile {} io-75.7]
@@ -9294,7 +9293,8 @@ test io-75.7 {invalid utf-8 encoding gets is not ignored (-profile strict)} -set
} -cleanup {
close $f
removeFile io-75.7
-} -match glob -returnCodes 1 -result {error reading "*": invalid or incomplete multibyte or wide character}
+} -match glob -returnCodes 1 -result {error reading "file*":\
+ invalid or incomplete multibyte or wide character}
test io-75.8 {invalid utf-8 encoding eof handling (-profile strict)} -setup {
set fn [makeFile {} io-75.8]
@@ -9330,10 +9330,11 @@ test io-75.9 {unrepresentable character write passes and is replaced by ?} -setu
removeFile io-75.9
} -match glob -result [list {A} {error writing "*": invalid or incomplete multibyte or wide character}]
-# Incomplete sequence test.
-# This error may IMHO only be detected with the close.
-# But the read already returns the incomplete sequence.
-test io-75.10 {incomplete multibyte encoding read is ignored} -setup {
+test io-75.10 {
+ incomplete multibyte encoding read is not ignored because "binary" sets
+ profile to strict
+} -setup {
+ set res {}
set fn [makeFile {} io-75.10]
set f [open $fn w+]
fconfigure $f -encoding binary
@@ -9342,13 +9343,21 @@ test io-75.10 {incomplete multibyte encoding read is ignored} -setup {
seek $f 0
fconfigure $f -encoding utf-8 -buffering none
} -body {
+ catch {read $f} errmsg
+ lappend res $errmsg
+ seek $f 0
+ chan configure $f -profile tcl8
set d [read $f]
binary scan $d H* hd
- set hd
+ lappend res $hd
+ return $res
} -cleanup {
close $f
removeFile io-75.10
-} -result 41c0
+ unset result
+} -match glob -result {{error reading "file*":\
+ invalid or incomplete multibyte or wide character} 41c0}
+
# The current result returns the orphan byte as byte.
# This may be expected due to special utf-8 handling.
@@ -9372,9 +9381,14 @@ test io-75.11 {shiftjis encoding error read results in raw bytes} -setup {
} -cleanup {
close $f
removeFile io-75.11
-} -match glob -result {41 1 {error reading "*": invalid or incomplete multibyte or wide character}}
+} -match glob -result {41 1 {error reading "file*":\
+ invalid or incomplete multibyte or wide character}}
-test io-75.12 {invalid utf-8 encoding read is ignored} -setup {
+test io-75.12 {
+ invalid utf-8 encoding read is not ignored because setting the encoding to
+ "binary" also set the profile to strict
+} -setup {
+ set res {}
set fn [makeFile {} io-75.12]
set f [open $fn w+]
fconfigure $f -encoding binary
@@ -9383,13 +9397,20 @@ test io-75.12 {invalid utf-8 encoding read is ignored} -setup {
seek $f 0
fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf
} -body {
+ catch {read $f} errmsg
+ lappend res $errmsg
+ chan configure $f -profile tcl8
+ seek $f 0
set d [read $f]
binary scan $d H* hd
- set hd
+ lappend res $hd
+ return $res
} -cleanup {
close $f
removeFile io-75.12
-} -result 4181
+ unset res
+} -match glob -result {{error reading "file*":\
+ invalid or incomplete multibyte or wide character} 4181}
test io-75.13 {invalid utf-8 encoding read is not ignored (-profile strict)} -setup {
set fn [makeFile {} io-75.13]
set f [open $fn w+]
@@ -9407,7 +9428,7 @@ test io-75.13 {invalid utf-8 encoding read is not ignored (-profile strict)} -se
} -cleanup {
close $f
removeFile io-75.13
-} -match glob -result {41 1 {error reading "*": invalid or incomplete multibyte or wide character}}
+} -match glob -result {41 1 {error reading "file*": invalid or incomplete multibyte or wide character}}
# ### ### ### ######### ######### #########
diff --git a/tests/ioCmd.test b/tests/ioCmd.test
index cab4745..2df2ca0 100644
--- a/tests/ioCmd.test
+++ b/tests/ioCmd.test
@@ -266,7 +266,7 @@ test iocmd-8.9 {fconfigure command} -setup {
fconfigure $f1
} -cleanup {
catch {close $f1}
-} -result {-blocking 1 -buffering none -buffersize 4040 -encoding binary -eofchar {} -profile tcl8 -translation lf}
+} -result {-blocking 1 -buffering none -buffersize 4040 -encoding iso8859-1 -eofchar {} -profile tcl8 -translation lf}
test iocmd-8.10 {fconfigure command} -returnCodes error -body {
fconfigure a b
} -result {can not find channel named "a"}
@@ -496,6 +496,7 @@ test iocmd-12.10 {POSIX open access modes: BINARY} {
set result
} 5
test iocmd-12.11 {POSIX open access modes: BINARY} -body {
+ after 100
set f [open $path(test1) {WRONLY BINARY TRUNC}]
puts $f Ɉ ;# throws an exception
} -cleanup {
diff --git a/tests/zlib.test b/tests/zlib.test
index 720fdd6..93c568b 100644
--- a/tests/zlib.test
+++ b/tests/zlib.test
@@ -292,7 +292,7 @@ test zlib-8.6 {transformation and fconfigure} -setup {
} -cleanup {
catch {close $fd}
removeFile $file
-} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf -checksum 1 -dictionary {}} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf}}
+} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding iso8859-1 -eofchar {} -profile strict -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding iso8859-1 -eofchar {} -profile strict -translation lf -checksum 1 -dictionary {}} {-blocking 1 -buffering full -buffersize 4096 -encoding iso8859-1 -eofchar {} -profile strict -translation lf}}
test zlib-8.7 {transformation and fconfigure} -setup {
set file [makeFile {} test.gz]
set fd [open $file wb]
@@ -302,7 +302,7 @@ test zlib-8.7 {transformation and fconfigure} -setup {
} -cleanup {
catch {close $fd}
removeFile $file
-} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf -checksum 0} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf}}
+} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding iso8859-1 -eofchar {} -profile strict -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding iso8859-1 -eofchar {} -profile strict -translation lf -checksum 0} {-blocking 1 -buffering full -buffersize 4096 -encoding iso8859-1 -eofchar {} -profile strict -translation lf}}
# Input is headers from fetching SPDY draft
# Dictionary is that which is proposed _in_ SPDY draft
set spdyHeaders "HTTP/1.0 200 OK\r\nContent-Type: text/html; charset=utf-8\r\nX-Robots-Tag: noarchive\r\nLast-Modified: Tue, 05 Jun 2012 02:43:25 GMT\r\nETag: \"1338864205129|#public|0|en|||0\"\r\nExpires: Tue, 05 Jun 2012 16:17:11 GMT\r\nDate: Tue, 05 Jun 2012 16:17:06 GMT\r\nCache-Control: public, max-age=5\r\nX-Content-Type-Options: nosniff\r\nX-XSS-Protection: 1; mode=block\r\nServer: GSE\r\n"