summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog22
-rw-r--r--generic/tclEncoding.c12
-rw-r--r--generic/tclIO.c77
3 files changed, 109 insertions, 2 deletions
diff --git a/ChangeLog b/ChangeLog
index 65f77be..db992a3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,25 @@
+2006-04-03 Andreas Kupries <andreask@activestate.com>
+
+ * generic/tclIO.c (ReadChars): Added check and panic and
+ commentary to a piece of code which relies on BUFFER_PADDING to
+ create enough space at the beginning of each buffer forthe
+ insertion of partial multi-byte data at the beginning of a
+ buffer. To explain why this code is ok, and as precaution if
+ someone twiddled the BUFFER_PADDING into uselessness.
+
+ * generic/tclIO.c (ReadChars): [SF Tcl Bug 1462248]. Added code
+ temporarily suppress the use of TCL_ENCODING_END set when eof
+ was reached while the buffer we are converting is not truly the
+ last buffer in the queue. together with the Utf bug below it was
+ possible to completely bollox the buffer data structures,
+ eventually crashing Tcl.
+
+ * generic/tclEncoding.c (UtfToUtfProc): Fixed problem where the
+ function accessed memory beyond the end of the input
+ buffer. When TCL_ENCODING_END is set and the last bytes of the
+ buffer start a multi-byte sequence. This bug contributed to [SF
+ Tcl Bug 1462248].
+
2006-03-30 Miguel Sofer <msofer@users.sf.net>
* generic/tclExecute.c: remove unused var and silence gcc warning
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 783e3d3..7abb402 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclEncoding.c,v 1.41 2006/03/24 21:11:45 dgp Exp $
+ * RCS: @(#) $Id: tclEncoding.c,v 1.42 2006/04/05 00:18:50 andreas_kupries Exp $
*/
#include "tclInt.h"
@@ -2225,6 +2225,16 @@ UtfToUtfProc(
*dst++ = 0;
src += 2;
+ } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {
+ /* Always check before using Tcl_UtfToUniChar. Not doing
+ * can so cause it run beyond the endof the buffer! If we
+ * * happen such an incomplete char its byts are made to *
+ * represent themselves.
+ */
+
+ ch = (Tcl_UniChar) *src;
+ src += 1;
+ dst += Tcl_UniCharToUtf(ch, dst);
} else {
src += Tcl_UtfToUniChar(src, &ch);
dst += Tcl_UniCharToUtf(ch, dst);
diff --git a/generic/tclIO.c b/generic/tclIO.c
index dc1f48c..b9c3454 100644
--- a/generic/tclIO.c
+++ b/generic/tclIO.c
@@ -10,7 +10,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclIO.c,v 1.106 2006/03/27 18:08:50 andreas_kupries Exp $
+ * RCS: @(#) $Id: tclIO.c,v 1.107 2006/04/05 00:18:50 andreas_kupries Exp $
*/
#include "tclInt.h"
@@ -4938,6 +4938,7 @@ ReadChars(
ChannelBuffer *bufPtr;
char *src, *dst;
Tcl_EncodingState oldState;
+ int encEndFlagSuppressed = 0;
factor = *factorPtr;
offset = *offsetPtr;
@@ -4986,6 +4987,54 @@ ReadChars(
}
dst = objPtr->bytes + offset;
+ /*
+ * SF Tcl Bug 1462248
+ * The cause of the crash reported in the referenced bug is this:
+ *
+ * - ReadChars, called with a single buffer, with a incomplete
+ * multi-byte character at the end (only the first byte of it).
+ * - Encoding translation fails, asks for more data
+ * - Data is read, and eof is reached, TCL_ENCODING_END (TEE) is set.
+ * - ReadChar is called again, converts the first buffer, but due
+ * to TEE it does not check for incomplete multi-byte data, and the
+ * character just after the end of the first buffer is a valid
+ * completion of the multi-byte header in the actual buffer. The
+ * conversion reads more characters from the buffer then present.
+ * This causes nextRemoved to overshoot nextAdded and the next
+ * reads compute a negative srcLen, cause further translations to
+ * fail, causing copying of data into the next buffer using bad
+ * arguments, causing the mecpy for to eventually fail.
+ *
+ * In the end it is a memory access bug spiraling out of control
+ * if the conditions are _just so_. And ultimate cause is that TEE
+ * is given to a conversion where it should not. TEE signals that
+ * this is the last buffer. Except in our case it is not.
+ *
+ * My solution is to suppress TEE if the first buffer is not the
+ * last. We will eventually need it given that EOF has been
+ * reached, but not right now. This is what the new flag
+ * "endEncSuppressFlag" is for.
+ *
+ * The bug in 'Tcl_Utf2UtfProc' where it read from memory behind
+ * the actual buffer has been fixed as well, and fixes the problem
+ * with the crash too, but this would still allow the generic
+ * layer to accidentially break a multi-byte sequence if the
+ * conditions are just right, because again the ExternalToUtf
+ * would be successful where it should not.
+ */
+
+ if ((statePtr->inputEncodingFlags & TCL_ENCODING_END) &&
+ (bufPtr->nextPtr != NULL)) {
+
+ /* TEE is set for a buffer which is not the last. Squash it
+ * for now, and restore it later, before yielding control to
+ * our caller.
+ */
+
+ statePtr->inputEncodingFlags &= ~TCL_ENCODING_END;
+ encEndFlagSuppressed = 1;
+ }
+
oldState = statePtr->inputEncodingState;
if (statePtr->flags & INPUT_NEED_NL) {
/*
@@ -5011,12 +5060,21 @@ ReadChars(
}
statePtr->inputEncodingFlags &= ~TCL_ENCODING_START;
*offsetPtr += 1;
+
+ if (encEndFlagSuppressed) {
+ statePtr->inputEncodingFlags |= TCL_ENCODING_END;
+ }
return 1;
}
Tcl_ExternalToUtf(NULL, statePtr->encoding, src, srcLen,
statePtr->inputEncodingFlags, &statePtr->inputEncodingState, dst,
dstNeeded + TCL_UTF_MAX, &srcRead, &dstWrote, &numChars);
+
+ if (encEndFlagSuppressed) {
+ statePtr->inputEncodingFlags |= TCL_ENCODING_END;
+ }
+
if (srcRead == 0) {
/*
* Not enough bytes in src buffer to make a complete char. Copy the
@@ -5046,6 +5104,23 @@ ReadChars(
}
return -1;
}
+
+ /* Space is made at the beginning of the buffer to copy the
+ * previous unused bytes there. Check first if the buffer we
+ * are using actually has enough space at its beginning for
+ * the data we are copying. Because if not we will write over the
+ * buffer management information, especially the 'nextPtr'.
+ *
+ * Note that the BUFFER_PADDING (See AllocChannelBuffer) is
+ * used to prevent exactly this situation. I.e. it should
+ * never happen. Therefore it is ok to panic should it happen
+ * despite the precautions.
+ */
+
+ if (nextPtr->nextRemoved - srcLen < 0) {
+ Tcl_Panic ("Buffer Underflow, BUFFER_PADDING not enough");
+ }
+
nextPtr->nextRemoved -= srcLen;
memcpy((VOID *) (nextPtr->buf + nextPtr->nextRemoved), (VOID *) src,
(size_t) srcLen);