summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2023-11-18 23:32:02 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2023-11-18 23:32:02 (GMT)
commitc3de06ef3212fee8a8d4b4fed7ec8b2fc2b8273d (patch)
treef5e5bcf0876ff01fb37f0887d002826d0316f8de
parent6a9b725dfc4ae4fdcd33bbb1273d05df6aae843b (diff)
parent1deb9621afce1231e6e9b5ab580ec57fb08eea6c (diff)
downloadtcl-c3de06ef3212fee8a8d4b4fed7ec8b2fc2b8273d.zip
tcl-c3de06ef3212fee8a8d4b4fed7ec8b2fc2b8273d.tar.gz
tcl-c3de06ef3212fee8a8d4b4fed7ec8b2fc2b8273d.tar.bz2
TIP #653: Handle consumed data of channel commands in case of encoding errors
-rw-r--r--doc/read.n15
-rw-r--r--generic/tclIO.c29
-rw-r--r--generic/tclIOCmd.c10
-rw-r--r--generic/tclInt.h1
-rw-r--r--tests/io.test49
5 files changed, 74 insertions, 30 deletions
diff --git a/doc/read.n b/doc/read.n
index 2add683..7c0c155 100644
--- a/doc/read.n
+++ b/doc/read.n
@@ -61,13 +61,15 @@ An encoding error is reported by the POSIX error code \fBEILSEQ\fR.
In blocking mode, the error is directly thrown, even, if there is a
leading decodable data portion.
The file pointer is advanced just before the encoding error.
-An eventual well decoded data chunk before the encoding error is lost.
-It is proposed to return this portion within the additional key \fB-data\fR
-in the error dictionary.
+An eventual well decoded data chunk before the encoding error is returned
+in the error option dictionary key \fB-data\fR.
+The value of the key contains the empty string, if the error arises at the
+first data position.
.PP
In non blocking mode, first, any data without encoding error is returned
(without error state).
In the next call, no data is returned and the \fBEILSEQ\fR error state is set.
+The key \fB-data\fR is not present.
.PP
Here is an example with an encoding error in UTF-8 encoding, which is then
introspected by a switch to the binary encoding. The test file contains a not
@@ -87,7 +89,7 @@ file35a65a0
% catch {read $f} e d
1
% set d
--code 1 -level 0
+-data A -code 1 -level 0
-errorstack {INNER {invokeStk1 read file35a65a0}}
-errorcode {POSIX EILSEQ {invalid or incomplete multibyte or wide character}}
-errorinfo {...} -errorline 1
@@ -98,6 +100,11 @@ file35a65a0
ÃB
% close $f
.CE
+The already decoded data "A" is returned in the error options dictionary key
+\fB-data\fR.
+The file position is advanced on the encoding error position 1.
+The data at the error position is thus recovered by the next \fBread\fR command.
+.PP
Non blocking example
.
.CS
diff --git a/generic/tclIO.c b/generic/tclIO.c
index bc1b1c6..0047f0b 100644
--- a/generic/tclIO.c
+++ b/generic/tclIO.c
@@ -4934,7 +4934,7 @@ Tcl_GetsObj(
* two times, as gs.bytesWrote is not 0 on the first pass. This feels
* once to much, as the data is anyway not used.
*/
-
+
/* Set eol to the position that caused the encoding error, and then
* continue to gotEOL, which stores the data that was decoded
* without error to objPtr. This allows the caller to do something
@@ -7616,6 +7616,33 @@ Tcl_Eof(
return GotFlag(statePtr, CHANNEL_EOF) ? 1 : 0;
}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclChannelGetBlockingMode --
+ *
+ * Returns 1 if the channel is in blocking mode (default), 0 otherwise.
+ *
+ * Results:
+ * 1 or 0, always.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclChannelGetBlockingMode(
+ Tcl_Channel chan)
+{
+ ChannelState *statePtr = ((Channel *) chan)->state;
+ /* State of real channel structure. */
+
+ return GotFlag(statePtr, CHANNEL_NONBLOCKING) ? 0 : 1;
+}
+
/*
*----------------------------------------------------------------------
*
diff --git a/generic/tclIOCmd.c b/generic/tclIOCmd.c
index 938fa01..9667419 100644
--- a/generic/tclIOCmd.c
+++ b/generic/tclIOCmd.c
@@ -459,7 +459,12 @@ Tcl_ReadObjCmd(
TclChannelPreserve(chan);
charactersRead = Tcl_ReadChars(chan, resultPtr, toRead, 0);
if (charactersRead == TCL_IO_FAILURE) {
- Tcl_DecrRefCount(resultPtr);
+ Tcl_Obj *returnOptsPtr = NULL;
+ if (TclChannelGetBlockingMode(chan)) {
+ returnOptsPtr = Tcl_NewDictObj();
+ Tcl_DictObjPut(NULL, returnOptsPtr, Tcl_NewStringObj("-data", -1),
+ resultPtr);
+ }
/*
* TIP #219.
* Capture error messages put by the driver into the bypass area and
@@ -473,6 +478,9 @@ Tcl_ReadObjCmd(
TclGetString(chanObjPtr), Tcl_PosixError(interp)));
}
TclChannelRelease(chan);
+ if (returnOptsPtr) {
+ Tcl_SetReturnOptions(interp, returnOptsPtr);
+ }
return TCL_ERROR;
}
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 3d8a702..f696ad2 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3112,6 +3112,7 @@ MODULE_SCOPE int TclByteArrayMatch(const unsigned char *string,
MODULE_SCOPE double TclCeil(const void *a);
MODULE_SCOPE void TclChannelPreserve(Tcl_Channel chan);
MODULE_SCOPE void TclChannelRelease(Tcl_Channel chan);
+MODULE_SCOPE int TclChannelGetBlockingMode(Tcl_Channel chan);
MODULE_SCOPE int TclCheckArrayTraces(Tcl_Interp *interp, Var *varPtr,
Var *arrayPtr, Tcl_Obj *name, int index);
MODULE_SCOPE int TclCheckBadOctal(Tcl_Interp *interp,
diff --git a/tests/io.test b/tests/io.test
index 341eee0..724c65a 100644
--- a/tests/io.test
+++ b/tests/io.test
@@ -9303,13 +9303,13 @@ test io-75.7 {
fconfigure $f -encoding utf-8 -buffering none -eofchar {} -translation lf \
-profile strict
} -body {
- list [catch {read $f} msg] $msg
+ list [catch {read $f} msg data] $msg [dict get $data -data]
} -cleanup {
close $f
removeFile io-75.7
- unset msg f fn
+ unset msg data f fn
} -match glob -result {1 {error reading "file*":\
- invalid or incomplete multibyte or wide character}}
+ invalid or incomplete multibyte or wide character} A}
test io-75.8 {invalid utf-8 encoding eof first handling (-profile strict)} -setup {
set fn [makeFile {} io-75.8]
@@ -9327,10 +9327,11 @@ test io-75.8 {invalid utf-8 encoding eof first handling (-profile strict)} -setu
binary scan $d H* hd
lappend hd [eof $f]
lappend hd [read $f]
- close $f
set hd
} -cleanup {
+ close $f
removeFile io-75.8
+ unset f d hd
} -result {41 1 {}}
test io-75.8.eoflater {invalid utf-8 encoding eof after handling (-profile strict)} -setup {
@@ -9345,17 +9346,17 @@ test io-75.8.eoflater {invalid utf-8 encoding eof after handling (-profile stric
fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A \
-translation lf -profile strict
} -body {
- set res [list [catch {read $f} msg] [eof $f]]
+ set res [list [catch {read $f} msg data] [eof $f] [dict get $data -data]]
chan configure $f -encoding iso8859-1
lappend res [read $f 1]
chan configure $f -encoding utf-8
- lappend res [catch {read $f 1} msg] $msg
+ lappend res [catch {read $f 1} msg data] $msg [dict get $data -data]
} -cleanup {
close $f
removeFile io-75.8
- unset res msg fn f
-} -match glob -result "1 0 \x81 1 {error reading \"*\":\
- invalid or incomplete multibyte or wide character}"
+ unset res msg data fn f
+} -match glob -result "1 0 A \x81 1 {error reading \"*\":\
+ invalid or incomplete multibyte or wide character} {}"
test io-strict-multibyte-eof {
@@ -9370,12 +9371,12 @@ test io-strict-multibyte-eof {
seek $chan 0
chan configure $chan -encoding utf-8 -profile strict
} -body {
- list [catch {read $chan 1} msg] $msg
+ list [catch {read $chan 1} msg data] $msg [dict get $data -data]
} -cleanup {
close $chan
- unset msg chan
+ unset msg chan data
} -match glob -result {1 {error reading "*":\
- invalid or incomplete multibyte or wide character}}
+ invalid or incomplete multibyte or wide character} {}}
test io-75.9 {unrepresentable character write throws error in strict profile} -setup {
set fn [makeFile {} io-75.9]
@@ -9432,13 +9433,13 @@ test io-75.11 {shiftjis encoding error read results in error (strict profile)} -
} -body {
set d [read $f]
binary scan $d H* hd
- lappend hd [catch {set d [read $f]} msg] $msg
+ lappend hd [catch {set d [read $f]} msg data] $msg [dict exists $data -data]
} -cleanup {
close $f
removeFile io-75.11
- unset d hd msg f
+ unset d hd msg data f
} -match glob -result {41 1 {error reading "file*":\
- invalid or incomplete multibyte or wide character}}
+ invalid or incomplete multibyte or wide character} 0}
test io-75.12 {invalid utf-8 encoding read is ignored} -setup {
set fn [makeFile {} io-75.12]
@@ -9473,13 +9474,13 @@ test io-75.13 {
} -body {
set d [read $f]
binary scan $d H* hd
- lappend hd [catch {read $f} msg] $msg
+ lappend hd [catch {read $f} msg data] $msg [dict exists $data -data]
} -cleanup {
close $f
removeFile io-75.13
- unset d hd msg f fn
+ unset d hd msg data f fn
} -match glob -result {41 1 {error reading "file*":\
- invalid or incomplete multibyte or wide character}}
+ invalid or incomplete multibyte or wide character} 0}
test io-75.14 {
[gets] succesfully returns lines prior to error
@@ -9497,16 +9498,16 @@ test io-75.14 {
} -body {
set res [gets $chan]
lappend res [gets $chan]
- lappend res [catch {gets $chan} msg] $msg
+ lappend res [catch {gets $chan} msg data] $msg [dict exists $data -data]
chan configure $chan -profile tcl8
lappend res [gets $chan]
lappend res [gets $chan]
return $res
} -cleanup {
close $chan
- unset chan res msg
+ unset chan res msg data
} -match glob -result {a b 1 {error reading "*":\
- invalid or incomplete multibyte or wide character} cÀ d}
+ invalid or incomplete multibyte or wide character} 0 cÀ d}
test io-75.15 {
invalid utf-8 encoding strict
@@ -9524,8 +9525,8 @@ test io-75.15 {
fconfigure $chan -encoding utf-8 -profile strict
lappend res [gets $chan]
lappend res [gets $chan]
- lappend res [catch {gets $chan} msg] $msg
- lappend res [catch {gets $chan} msg] $msg
+ lappend res [catch {gets $chan} msg data] $msg [dict exists $data -data]
+ lappend res [catch {gets $chan} msg data] $msg [dict exists $data -data]
chan configure $chan -translation binary
set data [read $chan 4]
foreach char [split $data {}] {
@@ -9540,7 +9541,7 @@ test io-75.15 {
close $chan
unset chan res msg data
} -match glob -result {hello AB 1 {error reading "*": invalid or incomplete multibyte or wide character}\
- 1 {error reading "*": invalid or incomplete multibyte or wide character} 43 44 c0 40 EF GHI}
+ 0 1 {error reading "*": invalid or incomplete multibyte or wide character} 0 43 44 c0 40 EF GHI}
# ### ### ### ######### ######### #########