From a17f5d02f8c3ec8babbb27325cba2039e56f1f10 Mon Sep 17 00:00:00 2001
From: andreask <andreask>
Date: Thu, 5 Jun 2014 00:22:59 +0000
Subject: Fixed a tricky interaction of IO system and encodings which could
 result in a panic.

The relevant function is ReadChars() (short RC in the following).

When the encoding and translation transforms deliver more characters
than were requested the iterative algorithm used by RC reduces the
value of "dstLimit" (= the number of bytes allowed to be copied into
the destination buffer) to force the next round to deliver less
characters, hopefully the number requested.

The existing code used the byte located just after the last wanted
character to determine the new limit. The resulting value could
_undershoot_ the best possible limit because Tcl_ExternalToUtf would
effectively reduce this limit further, by TCL_UTF_MAX+1, to have
enough space for a single multi-byte character in the buffer, and a
closing '\0' as well.

One effect of this were additional calls to ReadChars() to retrieve
the characters missed by a call with an undershot limit.

In the limit (sic) however this was also able to cause a full-blown
"Buffer Underflow" panic if the original request was for less than
TCL_UTF_MAX characters (*), and we are using a single-byte encoding
like iso-8859-1. Because then the undershot dstLimit would prevent the
next round from copying anything, and causing it to try and
consolidate the current buffer with the next buffer, thinking that it
had to merge a multi-byte character split across buffer boundaries.

(Ad *) For example because the previous call had undershot already and
left only such a small amount of characters behind!

The basic fix to the problem is to add TCL_UTF_MAX back to the limit,
like is done in all the (three) other places in RC setting a new
one. Note however that this naive fix may generate a new limit which
is the same as the old, or possibly larger. If that happens we act
very conservatively and reduce the limit by only one byte instead.

While I believe that this last conservative approach will never reduce
the limit to TCL_UTF_MAX or less before reaching a state where it
returnds the exact amount of requested characters I still added a
check against this situation anyway, causing a new panic if triggered.
---
 generic/tclIO.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)
diff --git a/generic/tclIO.c b/generic/tclIO.c
index b7135e9..e414668 100644
--- a/generic/tclIO.c
+++ b/generic/tclIO.c
@@ -5596,9 +5596,27 @@ ReadChars(
 	    /* 
 	     * We read more chars than allowed.  Reset limits to
 	     * prevent that and try again.
+             *
+             * Note how we are adding back TCL_UTF_MAX to ensure that the
+             * Tcl_External2Utf invoked by the next round will have enough
+             * space in the destination for at least one multi-byte
+             * character. Without that nothing will be copied and the system
+             * will try to consolidate the entire current and next buffer,
+             * likely triggering the "Buffer Underflow" panic.
 	     */
 
-	    dstLimit = Tcl_UtfAtIndex(dst, charsToRead + 1) - dst;
+            int newLimit = Tcl_UtfAtIndex(dst, charsToRead + 1) - dst + TCL_UTF_MAX;
+
+            if (newLimit >= dstLimit) {
+                dstLimit --;
+            } else {
+                dstLimit = newLimit;
+            }
+
+            if (dstLimit <= TCL_UTF_MAX) {
+                Tcl_Panic ("Not enough space left for a single multi-byte character.");
+            }
+
 	    statePtr->flags = savedFlags;
 	    statePtr->inputEncodingFlags = savedIEFlags;
 	    statePtr->inputEncodingState = savedState;
@@ -5661,7 +5679,8 @@ ReadChars(
 	     */
 
 	    if (nextPtr->nextRemoved - srcLen < 0) {
-		Tcl_Panic("Buffer Underflow, BUFFER_PADDING not enough");
+		Tcl_Panic("Buffer Underflow, BUFFER_PADDING not enough (%d < %d)",
+                          nextPtr->nextRemoved, srcLen);
 	    }
 
 	    nextPtr->nextRemoved -= srcLen;
-- 
cgit v0.12


From 5e12990261375322a279069b4bc5110233068335 Mon Sep 17 00:00:00 2001
From: dgp <dgp@users.sourceforge.net>
Date: Thu, 5 Jun 2014 15:17:29 +0000
Subject: When too many chars are read by ReadChars() and we trim the limits to
 get it right on the next pass, don't forget the TCL_UTF_MAX padding demanded
 by Tcl_ExternalToUtf().  (Thanks for finding that, aku!) Fix the factorPtr
 management.  It was just totaly wrong. The factor should be a ratio of the
 record of bytes read to the record of chars read. With those fixes, new test
 io-12.6 covers the "too many chars" code.

---
 generic/tclIO.c | 15 +++++++++++----
 tests/io.test   | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/generic/tclIO.c b/generic/tclIO.c
index b7135e9..308e7a9 100644
--- a/generic/tclIO.c
+++ b/generic/tclIO.c
@@ -5595,10 +5595,13 @@ ReadChars(
 
 	    /* 
 	     * We read more chars than allowed.  Reset limits to
-	     * prevent that and try again.
+	     * prevent that and try again.  Don't forget the extra
+	     * padding of TCL_UTF_MAX - 1 bytes demanded by the
+	     * Tcl_ExternalToUtf() call!
 	     */
 
-	    dstLimit = Tcl_UtfAtIndex(dst, charsToRead + 1) - dst;
+	    dstLimit = Tcl_UtfAtIndex(dst, charsToRead + 1) 
+		    + TCL_UTF_MAX - 1 - dst;
 	    statePtr->flags = savedFlags;
 	    statePtr->inputEncodingFlags = savedIEFlags;
 	    statePtr->inputEncodingState = savedState;
@@ -5676,8 +5679,12 @@ ReadChars(
 
     consume:
 	bufPtr->nextRemoved += srcRead;
-	if (dstWrote > srcRead + 1) {
-	    *factorPtr = dstWrote * UTF_EXPANSION_FACTOR / srcRead;
+	/*
+	 * If this read contained multibyte characters, revise factorPtr
+	 * so the next read will allocate bigger buffers.
+	 */
+	if (numChars && numChars < srcRead) {
+	    *factorPtr = srcRead * UTF_EXPANSION_FACTOR / numChars;
 	}
 	Tcl_SetObjLength(objPtr, numBytes + dstWrote);
 	return numChars;
diff --git a/tests/io.test b/tests/io.test
index f1248b9..a1625ba 100644
--- a/tests/io.test
+++ b/tests/io.test
@@ -1445,6 +1445,39 @@ test io-12.5 {ReadChars: fileevents on partial characters} {stdio openpipe filee
     lappend x [catch {close $f} msg] $msg
     set x
 } "{} timeout {} timeout \u7266 {} eof 0 {}"
+test io-12.6 {ReadChars: too many chars read} {
+    proc driver {cmd args} {
+        variable buffer
+        variable index
+        set chan [lindex $args 0]
+        switch -- $cmd {
+            initialize {
+                set index($chan) 0
+                set buffer($chan) [encoding convertto utf-8 \
+                        [string repeat \uBEEF 20][string repeat . 20]]
+                return {initialize finalize watch read}
+            }
+            finalize {
+                unset index($chan) buffer($chan)
+                return
+            }
+            watch {}
+            read {
+                set n [lindex $args 1]
+                set new [expr {$index($chan) + $n}]
+                set result [string range $buffer($chan) $index($chan) $new-1]
+                set index($chan) $new
+                return $result
+            }
+        }
+    }
+    set c [chan create read [namespace which driver]]
+    chan configure $c -encoding utf-8
+    while {![eof $c]} {
+        read $c 15
+    }
+    close $c
+} {}
 
 test io-13.1 {TranslateInputEOL: cr mode} {} {
     set f [open $path(test1) w]
-- 
cgit v0.12