summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordgp <dgp@users.sourceforge.net>2015-04-24 20:17:12 (GMT)
committerdgp <dgp@users.sourceforge.net>2015-04-24 20:17:12 (GMT)
commit86985bc264038f13e0e7ee2168047a97aec92464 (patch)
tree8683c744d7b2bc7e8da6b207a43b7ca4e574a861
parentd5453f4a90c64ff9ea80c89e8e7c4b51e7f26d16 (diff)
parent699e28e6b71c712ca43904df96a044f8c9594ec8 (diff)
downloadtcl-86985bc264038f13e0e7ee2168047a97aec92464.zip
tcl-86985bc264038f13e0e7ee2168047a97aec92464.tar.gz
tcl-86985bc264038f13e0e7ee2168047a97aec92464.tar.bz2
[879a0747be] Repair handling of incomplete multi-byte chars at the ends of input buffers. Also properly reset input encoding flags when cycling through a fleeting EOF condition.
-rw-r--r--generic/tclIO.c59
-rw-r--r--tests/io.test52
2 files changed, 105 insertions, 6 deletions
diff --git a/generic/tclIO.c b/generic/tclIO.c
index c4757ea..e46a22a 100644
--- a/generic/tclIO.c
+++ b/generic/tclIO.c
@@ -421,7 +421,11 @@ ChanRead(
* Each read op must set the blocked and eof states anew, not let
* the effect of prior reads leak through.
*/
+ if (GotFlag(chanPtr->state, CHANNEL_EOF)) {
+ chanPtr->state->inputEncodingFlags |= TCL_ENCODING_START;
+ }
ResetFlag(chanPtr->state, CHANNEL_BLOCKED | CHANNEL_EOF);
+ chanPtr->state->inputEncodingFlags &= ~TCL_ENCODING_END;
if (WillRead(chanPtr) < 0) {
return -1;
}
@@ -430,7 +434,11 @@ ChanRead(
dst, dstSize, &result);
/* Stop any flag leakage through stacked channel levels */
+ if (GotFlag(chanPtr->state, CHANNEL_EOF)) {
+ chanPtr->state->inputEncodingFlags |= TCL_ENCODING_START;
+ }
ResetFlag(chanPtr->state, CHANNEL_BLOCKED | CHANNEL_EOF);
+ chanPtr->state->inputEncodingFlags &= ~TCL_ENCODING_END;
if (bytesRead > 0) {
/*
* If we get a short read, signal up that we may be BLOCKED.
@@ -5739,7 +5747,11 @@ DoReadChars(
/* Special handling for zero-char read request. */
if (toRead == 0) {
+ if (GotFlag(statePtr, CHANNEL_EOF)) {
+ statePtr->inputEncodingFlags |= TCL_ENCODING_START;
+ }
ResetFlag(statePtr, CHANNEL_BLOCKED|CHANNEL_EOF);
+ statePtr->inputEncodingFlags &= ~TCL_ENCODING_END;
UpdateInterest(chanPtr);
return 0;
}
@@ -5752,7 +5764,11 @@ DoReadChars(
TclChannelPreserve((Tcl_Channel)chanPtr);
/* Must clear the BLOCKED|EOF flags here since we check before reading */
+ if (GotFlag(statePtr, CHANNEL_EOF)) {
+ statePtr->inputEncodingFlags |= TCL_ENCODING_START;
+ }
ResetFlag(statePtr, CHANNEL_BLOCKED|CHANNEL_EOF);
+ statePtr->inputEncodingFlags &= ~TCL_ENCODING_END;
for (copied = 0; (unsigned) toRead > 0; ) {
copiedNow = -1;
if (statePtr->inQueueHead != NULL) {
@@ -6004,12 +6020,24 @@ ReadChars(
/*
* Perform the encoding transformation. Read no more than
* srcLen bytes, write no more than dstLimit bytes.
+ *
+ * Some trickiness with encoding flags here. We do not want
+ * the end of a buffer to be treated as the end of all input
+ * when the presence of bytes in a next buffer are already
+ * known to exist. This is checked with an assert() because
+ * so far no test case causing the assertion to be false has
+ * been created. The normal operations of channel reading
+ * appear to cause EOF and TCL_ENCODING_END setting to appear
+ * only in situations where there are no further bytes in
+ * any buffers.
*/
+ assert(bufPtr->nextPtr == NULL || BytesLeft(bufPtr->nextPtr) == 0
+ || (statePtr->inputEncodingFlags & TCL_ENCODING_END) == 0);
+
code = Tcl_ExternalToUtf(NULL, encoding, src, srcLen,
- flags & (bufPtr->nextPtr ? ~0 : ~TCL_ENCODING_END),
- &statePtr->inputEncodingState, dst, dstLimit, &srcRead,
- &dstDecoded, &numChars);
+ flags, &statePtr->inputEncodingState,
+ dst, dstLimit, &srcRead, &dstDecoded, &numChars);
/*
* Perform the translation transformation in place. Read no more
@@ -6126,9 +6154,12 @@ ReadChars(
statePtr->inputEncodingFlags = savedIEFlags;
statePtr->inputEncodingState = savedState;
+ assert(bufPtr->nextPtr == NULL
+ || BytesLeft(bufPtr->nextPtr) == 0 || 0 ==
+ (statePtr->inputEncodingFlags & TCL_ENCODING_END));
+
Tcl_ExternalToUtf(NULL, encoding, src, srcLen,
- (statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE)
- & (bufPtr->nextPtr ? ~0 : ~TCL_ENCODING_END),
+ (statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE),
&statePtr->inputEncodingState, buffer, TCL_UTF_MAX + 1,
&read, &decoded, &count);
@@ -6503,9 +6534,13 @@ Tcl_Ungets(
/*
* Clear the EOF flags, and clear the BLOCKED bit.
*/
-
+
+ if (GotFlag(statePtr, CHANNEL_EOF)) {
+ statePtr->inputEncodingFlags |= TCL_ENCODING_START;
+ }
ResetFlag(statePtr,
CHANNEL_BLOCKED | CHANNEL_STICKY_EOF | CHANNEL_EOF | INPUT_SAW_CR);
+ statePtr->inputEncodingFlags &= ~TCL_ENCODING_END;
bufPtr = AllocChannelBuffer(len);
memcpy(InsertPoint(bufPtr), str, (size_t) len);
@@ -6875,8 +6910,12 @@ Tcl_Seek(
* point. Also clear CR related flags.
*/
+ if (GotFlag(statePtr, CHANNEL_EOF)) {
+ statePtr->inputEncodingFlags |= TCL_ENCODING_START;
+ }
ResetFlag(statePtr, CHANNEL_EOF | CHANNEL_STICKY_EOF | CHANNEL_BLOCKED |
INPUT_SAW_CR);
+ statePtr->inputEncodingFlags &= ~TCL_ENCODING_END;
/*
* If the channel is in asynchronous output mode, switch it back to
@@ -7970,7 +8009,11 @@ Tcl_SetChannelOption(
* ahead'. Ditto for blocked.
*/
+ if (GotFlag(statePtr, CHANNEL_EOF)) {
+ statePtr->inputEncodingFlags |= TCL_ENCODING_START;
+ }
ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_STICKY_EOF|CHANNEL_BLOCKED);
+ statePtr->inputEncodingFlags &= ~TCL_ENCODING_END;
return TCL_OK;
} else if (HaveOpt(1, "-translation")) {
const char *readMode, *writeMode;
@@ -9666,7 +9709,11 @@ DoRead(
/* Special handling for zero-char read request. */
if (bytesToRead == 0) {
+ if (GotFlag(statePtr, CHANNEL_EOF)) {
+ statePtr->inputEncodingFlags |= TCL_ENCODING_START;
+ }
ResetFlag(statePtr, CHANNEL_BLOCKED|CHANNEL_EOF);
+ statePtr->inputEncodingFlags &= ~TCL_ENCODING_END;
UpdateInterest(chanPtr);
return 0;
}
diff --git a/tests/io.test b/tests/io.test
index 06ae81d..4e1af72 100644
--- a/tests/io.test
+++ b/tests/io.test
@@ -1517,6 +1517,39 @@ test io-12.7 {ReadChars: too many chars read [bc5b790099]} {
}
close $c
} {}
+test io-12.8 {ReadChars: multibyte chars split} {
+ set f [open $path(test1) w]
+ fconfigure $f -translation binary
+ puts -nonewline $f [string repeat a 9]\xc2\xa0
+ close $f
+ set f [open $path(test1)]
+ fconfigure $f -encoding utf-8 -buffersize 10
+ set in [read $f]
+ close $f
+ scan [string index $in end] %c
+} 160
+test io-12.9 {ReadChars: multibyte chars split} {
+ set f [open $path(test1) w]
+ fconfigure $f -translation binary
+ puts -nonewline $f [string repeat a 9]\xc2
+ close $f
+ set f [open $path(test1)]
+ fconfigure $f -encoding utf-8 -buffersize 10
+ set in [read $f]
+ close $f
+ scan [string index $in end] %c
+} 194
+test io-12.10 {ReadChars: multibyte chars split} {
+ set f [open $path(test1) w]
+ fconfigure $f -translation binary
+ puts -nonewline $f [string repeat a 9]\xc2
+ close $f
+ set f [open $path(test1)]
+ fconfigure $f -encoding utf-8 -buffersize 11
+ set in [read $f]
+ close $f
+ scan [string index $in end] %c
+} 194
test io-13.1 {TranslateInputEOL: cr mode} {} {
set f [open $path(test1) w]
@@ -8542,6 +8575,25 @@ test io-73.4 {[5adc350683] [read] after EOF} -setup {
} -result {1 1 {more data
} 1}
+test io-73.5 {effect of eof on encoding end flags} -setup {
+ set fn [makeFile {} io-73.5]
+ set rfd [open $fn r]
+ set wfd [open $fn a]
+ chan configure $wfd -buffering none -translation binary
+ chan configure $rfd -buffersize 5 -encoding utf-8
+ read $rfd
+} -body {
+ set result [eof $rfd]
+ puts -nonewline $wfd "more\u00c2\u00a0data"
+ lappend result [eof $rfd]
+ lappend result [read $rfd]
+ lappend result [eof $rfd]
+} -cleanup {
+ close $wfd
+ close $rfd
+ removeFile io-73.5
+} -result [list 1 1 more\u00a0data 1]
+
# ### ### ### ######### ######### #########
# cleanup