From a17f5d02f8c3ec8babbb27325cba2039e56f1f10 Mon Sep 17 00:00:00 2001 From: andreask Date: Thu, 5 Jun 2014 00:22:59 +0000 Subject: Fixed a tricky interaction of IO system and encodings which could result in a panic. The relevant function is ReadChars() (short RC in the following). When the encoding and translation transforms deliver more characters than were requested the iterative algorithm used by RC reduces the value of "dstLimit" (= the number of bytes allowed to be copied into the destination buffer) to force the next round to deliver less characters, hopefully the number requested. The existing code used the byte located just after the last wanted character to determine the new limit. The resulting value could _undershoot_ the best possible limit because Tcl_ExternalToUtf would effectively reduce this limit further, by TCL_UTF_MAX+1, to have enough space for a single multi-byte character in the buffer, and a closing '\0' as well. One effect of this were additional calls to ReadChars() to retrieve the characters missed by a call with an undershot limit. In the limit (sic) however this was also able to cause a full-blown "Buffer Underflow" panic if the original request was for less than TCL_UTF_MAX characters (*), and we are using a single-byte encoding like iso-8859-1. Because then the undershot dstLimit would prevent the next round from copying anything, and causing it to try and consolidate the current buffer with the next buffer, thinking that it had to merge a multi-byte character split across buffer boundaries. (Ad *) For example because the previous call had undershot already and left only such a small amount of characters behind! The basic fix to the problem is to add TCL_UTF_MAX back to the limit, like is done in all the (three) other places in RC setting a new one. Note however that this naive fix may generate a new limit which is the same as the old, or possibly larger. If that happens we act very conservatively and reduce the limit by only one byte instead. While I believe that this last conservative approach will never reduce the limit to TCL_UTF_MAX or less before reaching a state where it returnds the exact amount of requested characters I still added a check against this situation anyway, causing a new panic if triggered. --- generic/tclIO.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index b7135e9..e414668 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5596,9 +5596,27 @@ ReadChars( /* * We read more chars than allowed. Reset limits to * prevent that and try again. + * + * Note how we are adding back TCL_UTF_MAX to ensure that the + * Tcl_External2Utf invoked by the next round will have enough + * space in the destination for at least one multi-byte + * character. Without that nothing will be copied and the system + * will try to consolidate the entire current and next buffer, + * likely triggering the "Buffer Underflow" panic. */ - dstLimit = Tcl_UtfAtIndex(dst, charsToRead + 1) - dst; + int newLimit = Tcl_UtfAtIndex(dst, charsToRead + 1) - dst + TCL_UTF_MAX; + + if (newLimit >= dstLimit) { + dstLimit --; + } else { + dstLimit = newLimit; + } + + if (dstLimit <= TCL_UTF_MAX) { + Tcl_Panic ("Not enough space left for a single multi-byte character."); + } + statePtr->flags = savedFlags; statePtr->inputEncodingFlags = savedIEFlags; statePtr->inputEncodingState = savedState; @@ -5661,7 +5679,8 @@ ReadChars( */ if (nextPtr->nextRemoved - srcLen < 0) { - Tcl_Panic("Buffer Underflow, BUFFER_PADDING not enough"); + Tcl_Panic("Buffer Underflow, BUFFER_PADDING not enough (%d < %d)", + nextPtr->nextRemoved, srcLen); } nextPtr->nextRemoved -= srcLen; -- cgit v0.12 From 5e12990261375322a279069b4bc5110233068335 Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 5 Jun 2014 15:17:29 +0000 Subject: When too many chars are read by ReadChars() and we trim the limits to get it right on the next pass, don't forget the TCL_UTF_MAX padding demanded by Tcl_ExternalToUtf(). (Thanks for finding that, aku!) Fix the factorPtr management. It was just totaly wrong. The factor should be a ratio of the record of bytes read to the record of chars read. With those fixes, new test io-12.6 covers the "too many chars" code. --- generic/tclIO.c | 15 +++++++++++---- tests/io.test | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index b7135e9..308e7a9 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5595,10 +5595,13 @@ ReadChars( /* * We read more chars than allowed. Reset limits to - * prevent that and try again. + * prevent that and try again. Don't forget the extra + * padding of TCL_UTF_MAX - 1 bytes demanded by the + * Tcl_ExternalToUtf() call! */ - dstLimit = Tcl_UtfAtIndex(dst, charsToRead + 1) - dst; + dstLimit = Tcl_UtfAtIndex(dst, charsToRead + 1) + + TCL_UTF_MAX - 1 - dst; statePtr->flags = savedFlags; statePtr->inputEncodingFlags = savedIEFlags; statePtr->inputEncodingState = savedState; @@ -5676,8 +5679,12 @@ ReadChars( consume: bufPtr->nextRemoved += srcRead; - if (dstWrote > srcRead + 1) { - *factorPtr = dstWrote * UTF_EXPANSION_FACTOR / srcRead; + /* + * If this read contained multibyte characters, revise factorPtr + * so the next read will allocate bigger buffers. + */ + if (numChars && numChars < srcRead) { + *factorPtr = srcRead * UTF_EXPANSION_FACTOR / numChars; } Tcl_SetObjLength(objPtr, numBytes + dstWrote); return numChars; diff --git a/tests/io.test b/tests/io.test index f1248b9..a1625ba 100644 --- a/tests/io.test +++ b/tests/io.test @@ -1445,6 +1445,39 @@ test io-12.5 {ReadChars: fileevents on partial characters} {stdio openpipe filee lappend x [catch {close $f} msg] $msg set x } "{} timeout {} timeout \u7266 {} eof 0 {}" +test io-12.6 {ReadChars: too many chars read} { + proc driver {cmd args} { + variable buffer + variable index + set chan [lindex $args 0] + switch -- $cmd { + initialize { + set index($chan) 0 + set buffer($chan) [encoding convertto utf-8 \ + [string repeat \uBEEF 20][string repeat . 20]] + return {initialize finalize watch read} + } + finalize { + unset index($chan) buffer($chan) + return + } + watch {} + read { + set n [lindex $args 1] + set new [expr {$index($chan) + $n}] + set result [string range $buffer($chan) $index($chan) $new-1] + set index($chan) $new + return $result + } + } + } + set c [chan create read [namespace which driver]] + chan configure $c -encoding utf-8 + while {![eof $c]} { + read $c 15 + } + close $c +} {} test io-13.1 {TranslateInputEOL: cr mode} {} { set f [open $path(test1) w] -- cgit v0.12