diff options
-rw-r--r-- | ChangeLog | 22 | ||||
-rw-r--r-- | generic/tclEncoding.c | 14 | ||||
-rw-r--r-- | generic/tclIO.c | 79 |
3 files changed, 111 insertions, 4 deletions
@@ -1,3 +1,25 @@ +2006-04-03 Andreas Kupries <andreask@activestate.com> + + * generic/tclIO.c (ReadChars): Added check and panic and + commentary to a piece of code which relies on BUFFER_PADDING to + create enough space at the beginning of each buffer forthe + insertion of partial multi-byte data at the beginning of a + buffer. To explain why this code is ok, and as precaution if + someone twiddled the BUFFER_PADDING into uselessness. + + * generic/tclIO.c (ReadChars): [SF Tcl Bug 1462248]. Added code + temporarily suppress the use of TCL_ENCODING_END set when eof + was reached while the buffer we are converting is not truly the + last buffer in the queue. together with the Utf bug below it was + possible to completely bollox the buffer data structures, + eventually crashing Tcl. + + * generic/tclEncoding.c (UtfToUtfProc): Fixed problem where the + function accessed memory beyond the end of the input + buffer. When TCL_ENCODING_END is set and the last bytes of the + buffer start a multi-byte sequence. This bug contributed to [SF + Tcl Bug 1462248]. + 2006-03-28 Jeff Hobbs <jeffh@ActiveState.com> * win/configure, win/tcl.m4: define MACHINE for gcc builds as well. diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 5fa799e..2a3698f 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -8,7 +8,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclEncoding.c,v 1.16.2.9 2006/03/13 20:57:09 dgp Exp $ + * RCS: @(#) $Id: tclEncoding.c,v 1.16.2.10 2006/04/05 00:06:02 andreas_kupries Exp $ */ #include "tclInt.h" @@ -2083,13 +2083,23 @@ UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, */ *dst++ = 0; src += 2; + } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { + /* Always check before using Tcl_UtfToUniChar. Not doing + * can so cause it run beyond the endof the buffer! If we + * * happen such an incomplete char its byts are made to * + * represent themselves. + */ + + ch = (Tcl_UniChar) *src; + src += 1; + dst += Tcl_UniCharToUtf(ch, dst); } else { src += Tcl_UtfToUniChar(src, &ch); dst += Tcl_UniCharToUtf(ch, dst); } } - *srcReadPtr = src - srcStart; + *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; diff --git a/generic/tclIO.c b/generic/tclIO.c index c7c2e3f..f82f648 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -10,7 +10,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclIO.c,v 1.61.2.19 2006/03/10 14:12:14 vasiljevic Exp $ + * RCS: @(#) $Id: tclIO.c,v 1.61.2.20 2006/04/05 00:06:03 andreas_kupries Exp $ */ #include "tclInt.h" @@ -4727,12 +4727,13 @@ ReadChars(statePtr, objPtr, charsToRead, offsetPtr, factorPtr) ChannelBuffer *bufPtr; char *src, *dst; Tcl_EncodingState oldState; + int encEndFlagSuppressed = 0; factor = *factorPtr; offset = *offsetPtr; bufPtr = statePtr->inQueueHead; - src = bufPtr->buf + bufPtr->nextRemoved; + src = bufPtr->buf + bufPtr->nextRemoved; srcLen = bufPtr->nextAdded - bufPtr->nextRemoved; toRead = charsToRead; @@ -4776,6 +4777,54 @@ ReadChars(statePtr, objPtr, charsToRead, offsetPtr, factorPtr) } dst = objPtr->bytes + offset; + /* + * SF Tcl Bug 1462248 + * The cause of the crash reported in the referenced bug is this: + * + * - ReadChars, called with a single buffer, with a incomplete + * multi-byte character at the end (only the first byte of it). + * - Encoding translation fails, asks for more data + * - Data is read, and eof is reached, TCL_ENCODING_END (TEE) is set. + * - ReadChar is called again, converts the first buffer, but due + * to TEE it does not check for incomplete multi-byte data, and the + * character just after the end of the first buffer is a valid + * completion of the multi-byte header in the actual buffer. The + * conversion reads more characters from the buffer then present. + * This causes nextRemoved to overshoot nextAdded and the next + * reads compute a negative srcLen, cause further translations to + * fail, causing copying of data into the next buffer using bad + * arguments, causing the mecpy for to eventually fail. + * + * In the end it is a memory access bug spiraling out of control + * if the conditions are _just so_. And ultimate cause is that TEE + * is given to a conversion where it should not. TEE signals that + * this is the last buffer. Except in our case it is not. + * + * My solution is to suppress TEE if the first buffer is not the + * last. We will eventually need it given that EOF has been + * reached, but not right now. This is what the new flag + * "endEncSuppressFlag" is for. + * + * The bug in 'Tcl_Utf2UtfProc' where it read from memory behind + * the actual buffer has been fixed as well, and fixes the problem + * with the crash too, but this would still allow the generic + * layer to accidentially break a multi-byte sequence if the + * conditions are just right, because again the ExternalToUtf + * would be successful where it should not. + */ + + if ((statePtr->inputEncodingFlags & TCL_ENCODING_END) && + (bufPtr->nextPtr != NULL)) { + + /* TEE is set for a buffer which is not the last. Squash it + * for now, and restore it later, before yielding control to + * our caller. + */ + + statePtr->inputEncodingFlags &= ~TCL_ENCODING_END; + encEndFlagSuppressed = 1; + } + oldState = statePtr->inputEncodingState; if (statePtr->flags & INPUT_NEED_NL) { /* @@ -4801,12 +4850,21 @@ ReadChars(statePtr, objPtr, charsToRead, offsetPtr, factorPtr) } statePtr->inputEncodingFlags &= ~TCL_ENCODING_START; *offsetPtr += 1; + + if (encEndFlagSuppressed) { + statePtr->inputEncodingFlags |= TCL_ENCODING_END; + } return 1; } Tcl_ExternalToUtf(NULL, statePtr->encoding, src, srcLen, statePtr->inputEncodingFlags, &statePtr->inputEncodingState, dst, dstNeeded + TCL_UTF_MAX, &srcRead, &dstWrote, &numChars); + + if (encEndFlagSuppressed) { + statePtr->inputEncodingFlags |= TCL_ENCODING_END; + } + if (srcRead == 0) { /* * Not enough bytes in src buffer to make a complete char. Copy @@ -4837,6 +4895,23 @@ ReadChars(statePtr, objPtr, charsToRead, offsetPtr, factorPtr) } return -1; } + + /* Space is made at the beginning of the buffer to copy the + * previous unused bytes there. Check first if the buffer we + * are using actually has enough space at its beginning for + * the data we are copying. Because if not we will write over the + * buffer management information, especially the 'nextPtr'. + * + * Note that the BUFFER_PADDING (See AllocChannelBuffer) is + * used to prevent exactly this situation. I.e. it should + * never happen. Therefore it is ok to panic should it happen + * despite the precautions. + */ + + if (nextPtr->nextRemoved - srcLen < 0) { + Tcl_Panic ("Buffer Underflow, BUFFER_PADDING not enough"); + } + nextPtr->nextRemoved -= srcLen; memcpy((VOID *) (nextPtr->buf + nextPtr->nextRemoved), (VOID *) src, (size_t) srcLen); |