From 3fe923f507658ef0f1f26740f711135dc869f84f Mon Sep 17 00:00:00 2001 From: andreas_kupries Date: Wed, 5 Apr 2006 00:05:53 +0000 Subject: * generic/tclIO.c (ReadChars): Added check and panic and commentary to a piece of code which relies on BUFFER_PADDING to create enough space at the beginning of each buffer forthe insertion of partial multi-byte data at the beginning of a buffer. To explain why this code is ok, and as precaution if someone twiddled the BUFFER_PADDING into uselessness. * generic/tclIO.c (ReadChars): [SF Tcl Bug 1462248]. Added code temporarily suppress the use of TCL_ENCODING_END set when eof was reached while the buffer we are converting is not truly the last buffer in the queue. together with the Utf bug below it was possible to completely bollox the buffer data structures, eventually crashing Tcl. * generic/tclEncoding.c (UtfToUtfProc): Fixed problem where the function accessed memory beyond the end of the input buffer. When TCL_ENCODING_END is set and the last bytes of the buffer start a multi-byte sequence. This bug contributed to [SF Tcl Bug 1462248]. --- ChangeLog | 22 ++++++++++++++ generic/tclEncoding.c | 14 +++++++-- generic/tclIO.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 111 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3f1b105..9ace770 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,25 @@ +2006-04-03 Andreas Kupries + + * generic/tclIO.c (ReadChars): Added check and panic and + commentary to a piece of code which relies on BUFFER_PADDING to + create enough space at the beginning of each buffer forthe + insertion of partial multi-byte data at the beginning of a + buffer. To explain why this code is ok, and as precaution if + someone twiddled the BUFFER_PADDING into uselessness. + + * generic/tclIO.c (ReadChars): [SF Tcl Bug 1462248]. Added code + temporarily suppress the use of TCL_ENCODING_END set when eof + was reached while the buffer we are converting is not truly the + last buffer in the queue. together with the Utf bug below it was + possible to completely bollox the buffer data structures, + eventually crashing Tcl. + + * generic/tclEncoding.c (UtfToUtfProc): Fixed problem where the + function accessed memory beyond the end of the input + buffer. When TCL_ENCODING_END is set and the last bytes of the + buffer start a multi-byte sequence. This bug contributed to [SF + Tcl Bug 1462248]. + 2006-03-28 Jeff Hobbs * win/configure, win/tcl.m4: define MACHINE for gcc builds as well. diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 5fa799e..2a3698f 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -8,7 +8,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclEncoding.c,v 1.16.2.9 2006/03/13 20:57:09 dgp Exp $ + * RCS: @(#) $Id: tclEncoding.c,v 1.16.2.10 2006/04/05 00:06:02 andreas_kupries Exp $ */ #include "tclInt.h" @@ -2083,13 +2083,23 @@ UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, */ *dst++ = 0; src += 2; + } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { + /* Always check before using Tcl_UtfToUniChar. Not doing + * can so cause it run beyond the endof the buffer! If we + * * happen such an incomplete char its byts are made to * + * represent themselves. + */ + + ch = (Tcl_UniChar) *src; + src += 1; + dst += Tcl_UniCharToUtf(ch, dst); } else { src += Tcl_UtfToUniChar(src, &ch); dst += Tcl_UniCharToUtf(ch, dst); } } - *srcReadPtr = src - srcStart; + *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; diff --git a/generic/tclIO.c b/generic/tclIO.c index c7c2e3f..f82f648 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -10,7 +10,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclIO.c,v 1.61.2.19 2006/03/10 14:12:14 vasiljevic Exp $ + * RCS: @(#) $Id: tclIO.c,v 1.61.2.20 2006/04/05 00:06:03 andreas_kupries Exp $ */ #include "tclInt.h" @@ -4727,12 +4727,13 @@ ReadChars(statePtr, objPtr, charsToRead, offsetPtr, factorPtr) ChannelBuffer *bufPtr; char *src, *dst; Tcl_EncodingState oldState; + int encEndFlagSuppressed = 0; factor = *factorPtr; offset = *offsetPtr; bufPtr = statePtr->inQueueHead; - src = bufPtr->buf + bufPtr->nextRemoved; + src = bufPtr->buf + bufPtr->nextRemoved; srcLen = bufPtr->nextAdded - bufPtr->nextRemoved; toRead = charsToRead; @@ -4776,6 +4777,54 @@ ReadChars(statePtr, objPtr, charsToRead, offsetPtr, factorPtr) } dst = objPtr->bytes + offset; + /* + * SF Tcl Bug 1462248 + * The cause of the crash reported in the referenced bug is this: + * + * - ReadChars, called with a single buffer, with a incomplete + * multi-byte character at the end (only the first byte of it). + * - Encoding translation fails, asks for more data + * - Data is read, and eof is reached, TCL_ENCODING_END (TEE) is set. + * - ReadChar is called again, converts the first buffer, but due + * to TEE it does not check for incomplete multi-byte data, and the + * character just after the end of the first buffer is a valid + * completion of the multi-byte header in the actual buffer. The + * conversion reads more characters from the buffer then present. + * This causes nextRemoved to overshoot nextAdded and the next + * reads compute a negative srcLen, cause further translations to + * fail, causing copying of data into the next buffer using bad + * arguments, causing the mecpy for to eventually fail. + * + * In the end it is a memory access bug spiraling out of control + * if the conditions are _just so_. And ultimate cause is that TEE + * is given to a conversion where it should not. TEE signals that + * this is the last buffer. Except in our case it is not. + * + * My solution is to suppress TEE if the first buffer is not the + * last. We will eventually need it given that EOF has been + * reached, but not right now. This is what the new flag + * "endEncSuppressFlag" is for. + * + * The bug in 'Tcl_Utf2UtfProc' where it read from memory behind + * the actual buffer has been fixed as well, and fixes the problem + * with the crash too, but this would still allow the generic + * layer to accidentially break a multi-byte sequence if the + * conditions are just right, because again the ExternalToUtf + * would be successful where it should not. + */ + + if ((statePtr->inputEncodingFlags & TCL_ENCODING_END) && + (bufPtr->nextPtr != NULL)) { + + /* TEE is set for a buffer which is not the last. Squash it + * for now, and restore it later, before yielding control to + * our caller. + */ + + statePtr->inputEncodingFlags &= ~TCL_ENCODING_END; + encEndFlagSuppressed = 1; + } + oldState = statePtr->inputEncodingState; if (statePtr->flags & INPUT_NEED_NL) { /* @@ -4801,12 +4850,21 @@ ReadChars(statePtr, objPtr, charsToRead, offsetPtr, factorPtr) } statePtr->inputEncodingFlags &= ~TCL_ENCODING_START; *offsetPtr += 1; + + if (encEndFlagSuppressed) { + statePtr->inputEncodingFlags |= TCL_ENCODING_END; + } return 1; } Tcl_ExternalToUtf(NULL, statePtr->encoding, src, srcLen, statePtr->inputEncodingFlags, &statePtr->inputEncodingState, dst, dstNeeded + TCL_UTF_MAX, &srcRead, &dstWrote, &numChars); + + if (encEndFlagSuppressed) { + statePtr->inputEncodingFlags |= TCL_ENCODING_END; + } + if (srcRead == 0) { /* * Not enough bytes in src buffer to make a complete char. Copy @@ -4837,6 +4895,23 @@ ReadChars(statePtr, objPtr, charsToRead, offsetPtr, factorPtr) } return -1; } + + /* Space is made at the beginning of the buffer to copy the + * previous unused bytes there. Check first if the buffer we + * are using actually has enough space at its beginning for + * the data we are copying. Because if not we will write over the + * buffer management information, especially the 'nextPtr'. + * + * Note that the BUFFER_PADDING (See AllocChannelBuffer) is + * used to prevent exactly this situation. I.e. it should + * never happen. Therefore it is ok to panic should it happen + * despite the precautions. + */ + + if (nextPtr->nextRemoved - srcLen < 0) { + Tcl_Panic ("Buffer Underflow, BUFFER_PADDING not enough"); + } + nextPtr->nextRemoved -= srcLen; memcpy((VOID *) (nextPtr->buf + nextPtr->nextRemoved), (VOID *) src, (size_t) srcLen); -- cgit v0.12