summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordgp <dgp@users.sourceforge.net>2014-05-08 17:30:32 (GMT)
committerdgp <dgp@users.sourceforge.net>2014-05-08 17:30:32 (GMT)
commit8c081b3c39faeaf973eb300f8b269e403fa6107a (patch)
tree0d6ac9ee1d752c0e554a76646a2f8bd802263486
parent48ae7e42c1bd6a104c78d737fd6b826a1a4ee7bd (diff)
parenteb476fdf9350b70cf8ab1ec90ad848dec9d1b75b (diff)
downloadtcl-8c081b3c39faeaf973eb300f8b269e403fa6107a.zip
tcl-8c081b3c39faeaf973eb300f8b269e403fa6107a.tar.gz
tcl-8c081b3c39faeaf973eb300f8b269e403fa6107a.tar.bz2
Merge the reforms of dgp-read-bytes branch into 8.5+ releases.
Large overhaul of I/O read operations - Protects integer overflow of buffers, reusing append machinery - Forces -buffersize changes to take place when commanded - Uses assertions to simplify code in "can't happen" situations - Eliminated duplication of -translation processing - Fixes bugs io-35.18b and io-35.20
-rw-r--r--generic/tclBinary.c92
-rw-r--r--generic/tclIO.c1383
-rw-r--r--generic/tclIO.h26
-rw-r--r--generic/tclInt.h4
-rw-r--r--generic/tclStringObj.c61
-rw-r--r--tests/io.test158
6 files changed, 861 insertions, 863 deletions
diff --git a/generic/tclBinary.c b/generic/tclBinary.c
index dbb296b..68289f2 100644
--- a/generic/tclBinary.c
+++ b/generic/tclBinary.c
@@ -549,6 +549,98 @@ UpdateStringOfByteArray(
/*
*----------------------------------------------------------------------
*
+ * TclAppendBytesToByteArray --
+ *
+ * This function appends an array of bytes to a byte array object. Note
+ * that the object *must* be unshared, and the array of bytes *must not*
+ * refer to the object being appended to.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Allocates enough memory for an array of bytes of the requested total
+ * size, or possibly larger. [Bug 2992970]
+ *
+ *----------------------------------------------------------------------
+ */
+
+#define TCL_MIN_GROWTH 1024
+void
+TclAppendBytesToByteArray(
+ Tcl_Obj *objPtr,
+ const unsigned char *bytes,
+ int len)
+{
+ ByteArray *byteArrayPtr;
+ int needed;
+
+ if (Tcl_IsShared(objPtr)) {
+ Tcl_Panic("%s called with shared object","TclAppendBytesToByteArray");
+ }
+ if (len < 0) {
+ Tcl_Panic("%s must be called with definite number of bytes to append",
+ "TclAppendBytesToByteArray");
+ }
+ if (len == 0) {
+ /* Append zero bytes is a no-op. */
+ return;
+ }
+ if (objPtr->typePtr != &tclByteArrayType) {
+ SetByteArrayFromAny(NULL, objPtr);
+ }
+ byteArrayPtr = GET_BYTEARRAY(objPtr);
+
+ if (len > INT_MAX - byteArrayPtr->used) {
+ Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX);
+ }
+
+ needed = byteArrayPtr->used + len;
+ /*
+ * If we need to, resize the allocated space in the byte array.
+ */
+
+ if (needed > byteArrayPtr->allocated) {
+ ByteArray *ptr = NULL;
+ int attempt;
+
+ if (needed <= INT_MAX/2) {
+ /* Try to allocate double the total space that is needed. */
+ attempt = 2 * needed;
+ ptr = (ByteArray *) attemptckrealloc((void *) byteArrayPtr,
+ BYTEARRAY_SIZE(attempt));
+ }
+ if (ptr == NULL) {
+ /* Try to allocate double the increment that is needed (plus). */
+ unsigned int limit = INT_MAX - needed;
+ unsigned int extra = len + TCL_MIN_GROWTH;
+ int growth = (int) ((extra > limit) ? limit : extra);
+
+ attempt = needed + growth;
+ ptr = (ByteArray *) attemptckrealloc((void *) byteArrayPtr,
+ BYTEARRAY_SIZE(attempt));
+ }
+ if (ptr == NULL) {
+ /* Last chance: Try to allocate exactly what is needed. */
+ attempt = needed;
+ ptr = (ByteArray *) ckrealloc((void *)byteArrayPtr,
+ BYTEARRAY_SIZE(attempt));
+ }
+ byteArrayPtr = ptr;
+ byteArrayPtr->allocated = attempt;
+ SET_BYTEARRAY(objPtr, byteArrayPtr);
+ }
+
+ if (bytes) {
+ memcpy(byteArrayPtr->bytes + byteArrayPtr->used, bytes, len);
+ }
+ byteArrayPtr->used += len;
+ TclInvalidateStringRep(objPtr);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* Tcl_BinaryObjCmd --
*
* This procedure implements the "binary" Tcl command.
diff --git a/generic/tclIO.c b/generic/tclIO.c
index 4628e90..8ca01ca 100644
--- a/generic/tclIO.c
+++ b/generic/tclIO.c
@@ -6,6 +6,7 @@
*
* Copyright (c) 1998-2000 Ajuba Solutions
* Copyright (c) 1995-1997 Sun Microsystems, Inc.
+ * Contributions from Don Porter, NIST, 2014. (not subject to US copyright)
*
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
@@ -176,8 +177,6 @@ static void CleanupChannelHandlers(Tcl_Interp *interp,
static int CloseChannel(Tcl_Interp *interp, Channel *chanPtr,
int errorCode);
static void CommonGetsCleanup(Channel *chanPtr);
-static int CopyAndTranslateBuffer(ChannelState *statePtr,
- char *result, int space);
static int CopyBuffer(Channel *chanPtr, char *result, int space);
static int CopyData(CopyState *csPtr, int mask);
static void CopyEventProc(ClientData clientData, int mask);
@@ -191,7 +190,7 @@ static int DetachChannel(Tcl_Interp *interp, Tcl_Channel chan);
static void DiscardInputQueued(ChannelState *statePtr,
int discardSavedBuffers);
static void DiscardOutputQueued(ChannelState *chanPtr);
-static int DoRead(Channel *chanPtr, char *srcPtr, int slen);
+static int DoRead(Channel *chanPtr, char *dst, int bytesToRead);
static int DoReadChars(Channel *chan, Tcl_Obj *objPtr, int toRead,
int appendFlag);
static int FilterInputBytes(Channel *chanPtr,
@@ -208,16 +207,16 @@ static int HaveVersion(const Tcl_ChannelType *typePtr,
static void PeekAhead(Channel *chanPtr, char **dstEndPtr,
GetsState *gsPtr);
static int ReadBytes(ChannelState *statePtr, Tcl_Obj *objPtr,
- int charsLeft, int *offsetPtr);
+ int charsLeft);
static int ReadChars(ChannelState *statePtr, Tcl_Obj *objPtr,
- int charsLeft, int *offsetPtr, int *factorPtr);
+ int charsLeft, int *factorPtr);
static void RecycleBuffer(ChannelState *statePtr,
ChannelBuffer *bufPtr, int mustDiscard);
static int StackSetBlockMode(Channel *chanPtr, int mode);
static int SetBlockMode(Tcl_Interp *interp, Channel *chanPtr,
int mode);
static void StopCopy(CopyState *csPtr);
-static int TranslateInputEOL(ChannelState *statePtr, char *dst,
+static void TranslateInputEOL(ChannelState *statePtr, char *dst,
const char *src, int *dstLenPtr, int *srcLenPtr);
static void UpdateInterest(Channel *chanPtr);
static int Write(Channel *chanPtr, const char *src,
@@ -1316,7 +1315,7 @@ Tcl_GetChannel(
chanPtr = Tcl_GetHashValue(hPtr);
chanPtr = chanPtr->state->bottomChanPtr;
if (modePtr != NULL) {
- *modePtr = chanPtr->state->flags & (TCL_READABLE|TCL_WRITABLE);
+ *modePtr = GotFlag(chanPtr->state, TCL_READABLE|TCL_WRITABLE);
}
return (Tcl_Channel) chanPtr;
@@ -1364,7 +1363,7 @@ TclGetChannelFromObj(
*channelPtr = (Tcl_Channel) (statePtr->bottomChanPtr);
if (modePtr != NULL) {
- *modePtr = statePtr->flags & (TCL_READABLE|TCL_WRITABLE);
+ *modePtr = GotFlag(statePtr, TCL_READABLE|TCL_WRITABLE);
}
return TCL_OK;
@@ -1627,7 +1626,7 @@ Tcl_StackChannel(
* --+---+---+---+----+
*/
- if ((mask & (statePtr->flags & (TCL_READABLE | TCL_WRITABLE))) == 0) {
+ if ((mask & (GotFlag(statePtr, TCL_READABLE | TCL_WRITABLE))) == 0) {
if (interp) {
Tcl_AppendResult(interp,
"reading and writing both disallowed for channel \"",
@@ -1684,17 +1683,17 @@ Tcl_StackChannel(
*/
if (((mask & TCL_READABLE) != 0) && (statePtr->inQueueHead != NULL)) {
+
/*
- * Remark: It is possible that the channel buffers contain data from
- * some earlier push-backs.
+ * When statePtr->inQueueHead is not NULL, we know
+ * prevChanPtr->inQueueHead must be NULL.
*/
- statePtr->inQueueTail->nextPtr = prevChanPtr->inQueueHead;
- prevChanPtr->inQueueHead = statePtr->inQueueHead;
+ assert(prevChanPtr->inQueueHead == NULL);
+ assert(prevChanPtr->inQueueTail == NULL);
- if (prevChanPtr->inQueueTail == NULL) {
- prevChanPtr->inQueueTail = statePtr->inQueueTail;
- }
+ prevChanPtr->inQueueHead = statePtr->inQueueHead;
+ prevChanPtr->inQueueTail = statePtr->inQueueTail;
statePtr->inQueueHead = NULL;
statePtr->inQueueTail = NULL;
@@ -2111,7 +2110,7 @@ Tcl_GetChannelMode(
ChannelState *statePtr = ((Channel *) chan)->state;
/* State of actual channel. */
- return (statePtr->flags & (TCL_READABLE | TCL_WRITABLE));
+ return GotFlag(statePtr, TCL_READABLE | TCL_WRITABLE);
}
/*
@@ -2289,12 +2288,12 @@ RecycleBuffer(
}
/*
- * Only save buffers which are at least as big as the requested buffersize
- * for the channel. This is to honor dynamic changes of the buffersize
+ * Only save buffers which have the requested buffersize for the
+ * channel. This is to honor dynamic changes of the buffersize
* made by the user.
*/
- if ((bufPtr->bufLength - BUFFER_PADDING) < statePtr->bufSize) {
+ if ((bufPtr->bufLength - BUFFER_PADDING) != statePtr->bufSize) {
ReleaseChannelBuffer(bufPtr);
return;
}
@@ -3754,9 +3753,7 @@ Write(
&statePtr->outputEncodingState, dst,
dstLen + BUFFER_PADDING, &srcRead, &dstWrote, NULL);
- if (srcRead != nlLen) {
- Tcl_Panic("Can This Happen?");
- }
+ assert (srcRead == nlLen);
bufPtr->nextAdded += dstWrote;
src++;
@@ -4559,7 +4556,7 @@ FilterInputBytes(
ChannelState *statePtr = chanPtr->state;
/* State info for channel */
ChannelBuffer *bufPtr;
- char *raw, *rawStart, *dst;
+ char *raw, *dst;
int offset, toRead, dstNeeded, spaceLeft, result, rawLen;
Tcl_Obj *objPtr;
#define ENCODING_LINESIZE 20 /* Lower bound on how many bytes to convert at
@@ -4621,8 +4618,7 @@ FilterInputBytes(
* string rep if we need more space.
*/
- rawStart = RemovePoint(bufPtr);
- raw = rawStart;
+ raw = RemovePoint(bufPtr);
rawLen = BytesLeft(bufPtr);
dst = *gsPtr->dstPtr;
@@ -4975,42 +4971,18 @@ Tcl_ReadRaw(
ResetFlag(statePtr, CHANNEL_BLOCKED);
}
-#ifdef TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING
/*
- * [Bug 943274]. Better emulation of non-blocking channels for
- * channels without BlockModeProc, by keeping track of true
- * fileevents generated by the OS == Data waiting and reading if
- * and only if we are sure to have data.
+ * Now go to the driver to get as much as is possible to
+ * fill the remaining request. Do all the error handling by
+ * ourselves. The code was stolen from 'GetInput' and
+ * slightly adapted (different return value here).
+ *
+ * The case of 'bytesToRead == 0' at this point cannot
+ * happen.
*/
- if (GotFlag(statePtr, CHANNEL_NONBLOCKING) &&
- (Tcl_ChannelBlockModeProc(chanPtr->typePtr) == NULL) &&
- !GotFlag(statePtr, CHANNEL_HAS_MORE_DATA)) {
- /*
- * We bypass the driver; it would block as no data is
- * available.
- */
-
- nread = -1;
- result = EWOULDBLOCK;
- } else {
-#endif /* TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING */
-
- /*
- * Now go to the driver to get as much as is possible to fill
- * the remaining request. Do all the error handling by
- * ourselves. The code was stolen from 'GetInput' and slightly
- * adapted (different return value here).
- *
- * The case of 'bytesToRead == 0' at this point cannot happen.
- */
-
- nread = ChanRead(chanPtr, bufPtr + copied,
- bytesToRead - copied, &result);
-
-#ifdef TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING
- }
-#endif /* TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING */
+ nread = ChanRead(chanPtr, bufPtr + copied,
+ bytesToRead - copied, &result);
if (nread > 0) {
/*
@@ -5023,18 +4995,6 @@ Tcl_ReadRaw(
if (nread < (bytesToRead - copied)) {
SetFlag(statePtr, CHANNEL_BLOCKED);
}
-
-#ifdef TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING
- if (nread <= (bytesToRead - copied)) {
- /*
- * [Bug 943274] We have read the available data, clear
- * flag.
- */
-
- ResetFlag(statePtr, CHANNEL_HAS_MORE_DATA);
- }
-#endif /* TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING */
-
} else if (nread == 0) {
SetFlag(statePtr, CHANNEL_EOF);
statePtr->inputEncodingFlags |= TCL_ENCODING_END;
@@ -5162,8 +5122,9 @@ DoReadChars(
ChannelState *statePtr = chanPtr->state;
/* State info for channel */
ChannelBuffer *bufPtr;
- int offset, factor, copied, copiedNow, result;
+ int factor, copied, copiedNow, result;
Tcl_Encoding encoding;
+ int binaryMode;
#define UTF_EXPANSION_FACTOR 1024
/*
@@ -5175,8 +5136,12 @@ DoReadChars(
factor = UTF_EXPANSION_FACTOR;
Tcl_Preserve(chanPtr);
+ binaryMode = (encoding == NULL)
+ && (statePtr->inputTranslation == TCL_TRANSLATE_LF)
+ && (statePtr->inEofChar == '\0');
+
if (appendFlag == 0) {
- if (encoding == NULL) {
+ if (binaryMode) {
Tcl_SetByteArrayLength(objPtr, 0);
} else {
Tcl_SetObjLength(objPtr, 0);
@@ -5185,27 +5150,21 @@ DoReadChars(
* We're going to access objPtr->bytes directly, so we must ensure
* that this is actually a string object (otherwise it might have
* been pure Unicode).
+ *
+ * Probably not needed anymore.
*/
TclGetString(objPtr);
}
- offset = 0;
- } else {
- if (encoding == NULL) {
- Tcl_GetByteArrayFromObj(objPtr, &offset);
- } else {
- TclGetStringFromObj(objPtr, &offset);
- }
}
for (copied = 0; (unsigned) toRead > 0; ) {
copiedNow = -1;
if (statePtr->inQueueHead != NULL) {
- if (encoding == NULL) {
- copiedNow = ReadBytes(statePtr, objPtr, toRead, &offset);
+ if (binaryMode) {
+ copiedNow = ReadBytes(statePtr, objPtr, toRead);
} else {
- copiedNow = ReadChars(statePtr, objPtr, toRead, &offset,
- &factor);
+ copiedNow = ReadChars(statePtr, objPtr, toRead, &factor);
}
/*
@@ -5255,11 +5214,6 @@ DoReadChars(
}
ResetFlag(statePtr, CHANNEL_BLOCKED);
- if (encoding == NULL) {
- Tcl_SetByteArrayLength(objPtr, offset);
- } else {
- Tcl_SetObjLength(objPtr, offset);
- }
/*
* Update the notifier state so we don't block while there is still data
@@ -5295,13 +5249,11 @@ DoReadChars(
* allocated to hold data read from the channel as needed.
*
* Results:
- * The return value is the number of bytes appended to the object and
- * *offsetPtr is filled with the total number of bytes in the object
- * (greater than the return value if there were already bytes in the
- * object).
+ * The return value is the number of bytes appended to the object, or
+ * -1 to indicate that zero bytes were read due to an EOF.
*
* Side effects:
- * None.
+ * The storage of bytes in objPtr can cause (re-)allocation of memory.
*
*---------------------------------------------------------------------------
*/
@@ -5314,72 +5266,22 @@ ReadBytes(
* been allocated to hold data, not how many
* bytes of data have been stored in the
* object. */
- int bytesToRead, /* Maximum number of bytes to store, or < 0 to
+ int bytesToRead) /* Maximum number of bytes to store, or < 0 to
* get all available bytes. Bytes are obtained
* from the first buffer in the queue - even
* if this number is larger than the number of
* bytes available in the first buffer, only
* the bytes from the first buffer are
* returned. */
- int *offsetPtr) /* On input, contains how many bytes of objPtr
- * have been used to hold data. On output,
- * filled with how many bytes are now being
- * used. */
{
- int toRead, srcLen, offset, length, srcRead, dstWrote;
- ChannelBuffer *bufPtr;
- char *src, *dst;
-
- offset = *offsetPtr;
-
- bufPtr = statePtr->inQueueHead;
- src = RemovePoint(bufPtr);
- srcLen = BytesLeft(bufPtr);
-
- toRead = bytesToRead;
- if ((unsigned) toRead > (unsigned) srcLen) {
- toRead = srcLen;
- }
-
- dst = (char *) Tcl_GetByteArrayFromObj(objPtr, &length);
- if (toRead > length - offset - 1) {
- /*
- * Double the existing size of the object or make enough room to hold
- * all the characters we may get from the source buffer, whichever is
- * larger.
- */
-
- length = offset * 2;
- if (offset < toRead) {
- length = offset + toRead + 1;
- }
- dst = (char *) Tcl_SetByteArrayLength(objPtr, length);
- }
- dst += offset;
+ ChannelBuffer *bufPtr = statePtr->inQueueHead;
+ int srcLen = BytesLeft(bufPtr);
+ int toRead = bytesToRead>srcLen || bytesToRead<0 ? srcLen : bytesToRead;
- if (GotFlag(statePtr, INPUT_NEED_NL)) {
- ResetFlag(statePtr, INPUT_NEED_NL);
- if ((srcLen == 0) || (*src != '\n')) {
- *dst = '\r';
- *offsetPtr += 1;
- return 1;
- }
- *dst++ = '\n';
- src++;
- srcLen--;
- toRead--;
- }
-
- srcRead = srcLen;
- dstWrote = toRead;
- if (TranslateInputEOL(statePtr, dst, src, &dstWrote, &srcRead) != 0) {
- if (dstWrote == 0) {
- return -1;
- }
- }
- bufPtr->nextRemoved += srcRead;
- *offsetPtr += dstWrote;
- return dstWrote;
+ TclAppendBytesToByteArray(objPtr, (unsigned char *) RemovePoint(bufPtr),
+ toRead);
+ bufPtr->nextRemoved += toRead;
+ return toRead;
}
/*
@@ -5423,263 +5325,316 @@ ReadChars(
* is larger than the number of characters
* available in the first buffer, only the
* characters from the first buffer are
- * returned. */
- int *offsetPtr, /* On input, contains how many bytes of objPtr
- * have been used to hold data. On output,
- * filled with how many bytes are now being
- * used. */
+ * returned. The execption is when there is
+ * not any complete character in the first
+ * buffer. In that case, a recursive call
+ * effectively obtains chars from the
+ * second buffer. */
int *factorPtr) /* On input, contains a guess of how many
* bytes need to be allocated to hold the
* result of converting N source bytes to
* UTF-8. On output, contains another guess
* based on the data seen so far. */
{
- int toRead, factor, offset, spaceLeft, srcLen, dstNeeded;
- int srcRead, dstWrote, numChars, dstRead;
- ChannelBuffer *bufPtr;
- char *src, *dst;
- Tcl_EncodingState oldState;
- int encEndFlagSuppressed = 0;
-
- factor = *factorPtr;
- offset = *offsetPtr;
+ Tcl_Encoding encoding = statePtr->encoding? statePtr->encoding
+ : GetBinaryEncoding();
+ Tcl_EncodingState savedState = statePtr->inputEncodingState;
+ ChannelBuffer *bufPtr = statePtr->inQueueHead;
+ int savedIEFlags = statePtr->inputEncodingFlags;
+ int savedFlags = statePtr->flags;
+ char *dst, *src = RemovePoint(bufPtr);
+ int dstLimit, numBytes, srcLen = BytesLeft(bufPtr);
- bufPtr = statePtr->inQueueHead;
- src = RemovePoint(bufPtr);
- srcLen = BytesLeft(bufPtr);
+ /*
+ * One src byte can yield at most one character. So when the
+ * number of src bytes we plan to read is less than the limit on
+ * character count to be read, clearly we will remain within that
+ * limit, and we can use the value of "srcLen" as a tighter limit
+ * for sizing receiving buffers.
+ */
- toRead = charsToRead;
- if ((unsigned)toRead > (unsigned)srcLen) {
- toRead = srcLen;
- }
+ int toRead = ((unsigned) charsToRead > srcLen) ? srcLen : charsToRead;
/*
* 'factor' is how much we guess that the bytes in the source buffer will
* expand when converted to UTF-8 chars. This guess comes from analyzing
* how many characters were produced by the previous pass.
*/
+
+ int factor = *factorPtr;
+ int dstNeeded = TCL_UTF_MAX - 1 + toRead * factor / UTF_EXPANSION_FACTOR;
- dstNeeded = TCL_UTF_MAX - 1 + toRead * factor / UTF_EXPANSION_FACTOR;
- spaceLeft = objPtr->length - offset;
-
- if (dstNeeded > spaceLeft) {
- /*
- * Double the existing size of the object or make enough room to hold
- * all the characters we want from the source buffer, whichever is
- * larger.
- */
-
- int length = offset + ((offset < dstNeeded) ? dstNeeded : offset);
-
- if (Tcl_AttemptSetObjLength(objPtr, length) == 0) {
- length = offset + dstNeeded;
- if (Tcl_AttemptSetObjLength(objPtr, length) == 0) {
- dstNeeded = TCL_UTF_MAX - 1 + toRead;
- length = offset + dstNeeded;
- Tcl_SetObjLength(objPtr, length);
- }
- }
- spaceLeft = length - offset;
- }
+ (void) TclGetStringFromObj(objPtr, &numBytes);
+ Tcl_AppendToObj(objPtr, NULL, dstNeeded);
if (toRead == srcLen) {
- /*
- * Want to convert the whole buffer in one pass. If we have enough
- * space, convert it using all available space in object rather than
- * using the factor.
- */
-
- dstNeeded = spaceLeft;
+ unsigned int size;
+ dst = TclGetStringStorage(objPtr, &size) + numBytes;
+ dstNeeded = size - numBytes;
+ } else {
+ dst = TclGetString(objPtr) + numBytes;
}
- dst = objPtr->bytes + offset;
/*
- * [Bug 1462248]: The cause of the crash reported in this bug is this:
+ * This routine is burdened with satisfying several constraints.
+ * It cannot append more than 'charsToRead` chars onto objPtr.
+ * This is measured after encoding and translation transformations
+ * are completed. There is no precise number of src bytes that can
+ * be associated with the limit. Yet, when we are done, we must know
+ * precisely the number of src bytes that were consumed to produce
+ * the appended chars, so that all subsequent bytes are left in
+ * the buffers for future read operations.
*
- * - ReadChars, called with a single buffer, with a incomplete
- * multi-byte character at the end (only the first byte of it).
- * - Encoding translation fails, asks for more data
- * - Data is read, and eof is reached, TCL_ENCODING_END (TEE) is set.
- * - ReadChar is called again, converts the first buffer, but due to TEE
- * it does not check for incomplete multi-byte data, and the character
- * just after the end of the first buffer is a valid completion of the
- * multi-byte header in the actual buffer. The conversion reads more
- * characters from the buffer then present. This causes nextRemoved to
- * overshoot nextAdded and the next reads compute a negative srcLen,
- * cause further translations to fail, causing copying of data into the
- * next buffer using bad arguments, causing the mecpy for to eventually
- * fail.
- *
- * In the end it is a memory access bug spiraling out of control if the
- * conditions are _just so_. And ultimate cause is that TEE is given to a
- * conversion where it should not. TEE signals that this is the last
- * buffer. Except in our case it is not.
- *
- * My solution is to suppress TEE if the first buffer is not the last. We
- * will eventually need it given that EOF has been reached, but not right
- * now. This is what the new flag "endEncSuppressFlag" is for.
- *
- * The bug in 'Tcl_Utf2UtfProc' where it read from memory behind the
- * actual buffer has been fixed as well, and fixes the problem with the
- * crash too, but this would still allow the generic layer to
- * accidentially break a multi-byte sequence if the conditions are just
- * right, because again the ExternalToUtf would be successful where it
- * should not.
+ * The consequence is that we have no choice but to implement a
+ * "trial and error" approach, where in general we may need to
+ * perform transformations and copies multiple times to achieve
+ * a consistent set of results. This takes the shape of a loop.
*/
- if ((statePtr->inputEncodingFlags & TCL_ENCODING_END) &&
- (bufPtr->nextPtr != NULL)) {
+ dstLimit = dstNeeded + 1;
+ while (1) {
+ int dstDecoded, dstRead, dstWrote, srcRead, numChars;
+
/*
- * TEE is set for a buffer which is not the last. Squash it for now,
- * and restore it later, before yielding control to our caller.
+ * Perform the encoding transformation. Read no more than
+ * srcLen bytes, write no more than dstLimit bytes.
*/
- statePtr->inputEncodingFlags &= ~TCL_ENCODING_END;
- encEndFlagSuppressed = 1;
- }
+ int code = Tcl_ExternalToUtf(NULL, encoding, src, srcLen,
+ statePtr->inputEncodingFlags & (bufPtr->nextPtr
+ ? ~0 : ~TCL_ENCODING_END), &statePtr->inputEncodingState,
+ dst, dstLimit, &srcRead, &dstDecoded, &numChars);
- oldState = statePtr->inputEncodingState;
- if (GotFlag(statePtr, INPUT_NEED_NL)) {
/*
- * We want a '\n' because the last character we saw was '\r'.
+ * Perform the translation transformation in place. Read no more
+ * than the dstDecoded bytes the encoding transformation actually
+ * produced. Capture the number of bytes written in dstWrote.
+ * Capture the number of bytes actually consumed in dstRead.
*/
- ResetFlag(statePtr, INPUT_NEED_NL);
- Tcl_ExternalToUtf(NULL, statePtr->encoding, src, srcLen,
- statePtr->inputEncodingFlags, &statePtr->inputEncodingState,
- dst, TCL_UTF_MAX + 1, &srcRead, &dstWrote, &numChars);
- if ((dstWrote > 0) && (*dst == '\n')) {
+ dstWrote = dstLimit;
+ dstRead = dstDecoded;
+ TranslateInputEOL(statePtr, dst, dst, &dstWrote, &dstRead);
+
+ if (dstRead < dstDecoded) {
+
/*
- * The next char was a '\n'. Consume it and produce a '\n'.
+ * The encoding transformation produced bytes that the
+ * translation transformation did not consume. Why did
+ * this happen?
*/
- bufPtr->nextRemoved += srcRead;
- } else {
+ if (statePtr->inEofChar && dst[dstRead] == statePtr->inEofChar) {
+ /*
+ * 1) There's an eof char set on the channel, and
+ * we saw it and stopped translating at that point.
+ *
+ * NOTE the bizarre spec of TranslateInputEOL in this case.
+ * Clearly the eof char had to be read in order to account
+ * for the stopping, but the value of dstRead does not
+ * include it.
+ *
+ * Also rather bizarre, our caller can only notice an
+ * EOF condition if we return the value -1 as the number
+ * of chars read. This forces us to perform a 2-call
+ * dance where the first call can read all the chars
+ * up to the eof char, and the second call is solely
+ * for consuming the encoded eof char then pointed at
+ * by src so that we can return that magic -1 value.
+ * This seems really wasteful, especially since
+ * the first decoding pass of each call is likely to
+ * decode many bytes beyond that eof char that's all we
+ * care about.
+ */
+
+ if (dstRead == 0) {
+ /*
+ * Curious choice in the eof char handling. We leave
+ * the eof char in the buffer. So, no need to compute
+ * a proper srcRead value. At this point, there
+ * are no chars before the eof char in the buffer.
+ */
+ Tcl_SetObjLength(objPtr, numBytes);
+ return -1;
+ }
+
+ {
+ /*
+ * There are chars leading the buffer before the eof
+ * char. Adjust the dstLimit so we go back and read
+ * only those and do not encounter the eof char this
+ * time.
+ */
+
+ dstLimit = dstRead + TCL_UTF_MAX;
+ statePtr->flags = savedFlags;
+ statePtr->inputEncodingFlags = savedIEFlags;
+ statePtr->inputEncodingState = savedState;
+ continue;
+ }
+ }
+
/*
- * The next char was not a '\n'. Produce a '\r'.
+ * 2) The other way to read fewer bytes than are decoded
+ * is when the final byte is \r and we're in a CRLF
+ * translation mode so we cannot decide whether to
+ * record \r or \n yet.
*/
- *dst = '\r';
- }
- statePtr->inputEncodingFlags &= ~TCL_ENCODING_START;
- *offsetPtr += 1;
+ assert(dstRead + 1 == dstDecoded);
+ assert(dst[dstRead] == '\r');
+ assert(statePtr->inputTranslation == TCL_TRANSLATE_CRLF);
- if (encEndFlagSuppressed) {
- statePtr->inputEncodingFlags |= TCL_ENCODING_END;
- }
- return 1;
- }
+ if (dstWrote > 0) {
+ /*
+ * There are chars we can read before we hit the bare cr.
+ * Go back with a smaller dstLimit so we get them in the
+ * next pass, compute a matching srcRead, and don't end
+ * up back here in this call.
+ */
- Tcl_ExternalToUtf(NULL, statePtr->encoding, src, srcLen,
- statePtr->inputEncodingFlags, &statePtr->inputEncodingState, dst,
- dstNeeded + 1, &srcRead, &dstWrote, &numChars);
+ dstLimit = dstRead + TCL_UTF_MAX;
+ statePtr->flags = savedFlags;
+ statePtr->inputEncodingFlags = savedIEFlags;
+ statePtr->inputEncodingState = savedState;
+ continue;
+ }
- if (encEndFlagSuppressed) {
- statePtr->inputEncodingFlags |= TCL_ENCODING_END;
- }
+ assert(dstWrote == 0);
+ assert(dstRead == 0);
+ assert(dstDecoded == 1);
- if (srcRead == 0) {
- /*
- * Not enough bytes in src buffer to make a complete char. Copy the
- * bytes to the next buffer to make a new contiguous string, then tell
- * the caller to fill the buffer with more bytes.
- */
+ /*
+ * We decoded only the bare cr, and we cannot read a
+ * translated char from that alone. We have to know what's
+ * next. So why do we only have the one decoded char?
+ */
- ChannelBuffer *nextPtr;
+ if (code != TCL_OK) {
+ char buffer[TCL_UTF_MAX + 2];
+ int read, decoded, count;
+
+ /*
+ * Didn't get everything the buffer could offer
+ */
+
+ statePtr->flags = savedFlags;
+ statePtr->inputEncodingFlags = savedIEFlags;
+ statePtr->inputEncodingState = savedState;
+
+ Tcl_ExternalToUtf(NULL, encoding, src, srcLen,
+ statePtr->inputEncodingFlags & (bufPtr->nextPtr
+ ? ~0 : ~TCL_ENCODING_END), &statePtr->inputEncodingState,
+ buffer, TCL_UTF_MAX + 2, &read, &decoded, &count);
+
+ if (count == 2) {
+ if (buffer[1] == '\n') {
+ /* \r\n translate to \n */
+ dst[0] = '\n';
+ bufPtr->nextRemoved += read;
+ } else {
+ dst[0] = '\r';
+ bufPtr->nextRemoved += srcRead;
+ }
+
+ dst[1] = '\0';
+ statePtr->inputEncodingFlags &= ~TCL_ENCODING_START;
+
+ Tcl_SetObjLength(objPtr, numBytes + 1);
+ return 1;
+ }
+
+ } else if (GotFlag(statePtr, CHANNEL_EOF)) {
- nextPtr = bufPtr->nextPtr;
- if (nextPtr == NULL) {
- if (srcLen > 0) {
/*
- * There isn't enough data in the buffers to complete the next
- * character, so we need to wait for more data before the next
- * file event can be delivered. [Bug 478856]
- *
- * The exception to this is if the input buffer was completely
- * empty before we tried to convert its contents. Nothing in,
- * nothing out, and no incomplete character data. The
- * conversion before the current one was complete.
+ * The bare \r is the only char and we will never read
+ * a subsequent char to make the determination.
*/
- SetFlag(statePtr, CHANNEL_NEED_MORE_DATA);
+ dst[0] = '\r';
+ bufPtr->nextRemoved = bufPtr->nextAdded;
+ Tcl_SetObjLength(objPtr, numBytes + 1);
+ return 1;
}
- return -1;
+
+ /* FALL THROUGH - get more data (dstWrote == 0) */
}
- /*
- * Space is made at the beginning of the buffer to copy the previous
- * unused bytes there. Check first if the buffer we are using actually
- * has enough space at its beginning for the data we are copying.
- * Because if not we will write over the buffer management
- * information, especially the 'nextPtr'.
- *
- * Note that the BUFFER_PADDING (See AllocChannelBuffer) is used to
- * prevent exactly this situation. I.e. it should never happen.
- * Therefore it is ok to panic should it happen despite the
- * precautions.
+ /*
+ * The translation transformation can only reduce the number
+ * of chars when it converts \r\n into \n. The reduction in
+ * the number of chars is the difference in bytes read and written.
*/
- if (nextPtr->nextRemoved - srcLen < 0) {
- Tcl_Panic("Buffer Underflow, BUFFER_PADDING not enough");
- }
+ numChars -= (dstRead - dstWrote);
- nextPtr->nextRemoved -= srcLen;
- memcpy(RemovePoint(nextPtr), src, (size_t) srcLen);
- RecycleBuffer(statePtr, bufPtr, 0);
- statePtr->inQueueHead = nextPtr;
- return ReadChars(statePtr, objPtr, charsToRead, offsetPtr, factorPtr);
- }
+ if (charsToRead > 0 && numChars > charsToRead) {
- dstRead = dstWrote;
- if (TranslateInputEOL(statePtr, dst, dst, &dstWrote, &dstRead) != 0) {
- /*
- * Hit EOF char. How many bytes of src correspond to where the EOF was
- * located in dst? Run the conversion again with an output buffer just
- * big enough to hold the data so we can get the correct value for
- * srcRead.
- */
+ /*
+ * We read more chars than allowed. Reset limits to
+ * prevent that and try again.
+ */
- if (dstWrote == 0) {
- return -1;
+ dstLimit = Tcl_UtfAtIndex(dst, charsToRead + 1) - dst;
+ statePtr->flags = savedFlags;
+ statePtr->inputEncodingFlags = savedIEFlags;
+ statePtr->inputEncodingState = savedState;
+ continue;
}
- statePtr->inputEncodingState = oldState;
- Tcl_ExternalToUtf(NULL, statePtr->encoding, src, srcLen,
- statePtr->inputEncodingFlags, &statePtr->inputEncodingState,
- dst, dstRead + TCL_UTF_MAX, &srcRead, &dstWrote, &numChars);
- TranslateInputEOL(statePtr, dst, dst, &dstWrote, &dstRead);
- }
- /*
- * The number of characters that we got may be less than the number that
- * we started with because "\r\n" sequences may have been turned into just
- * '\n' in dst.
- */
+ if (dstWrote == 0) {
- numChars -= (dstRead - dstWrote);
+ /*
+ * We were not able to read any chars. Maybe there were
+ * not enough src bytes to decode into a char. Maybe
+ * a lone \r could not be translated (crlf mode). Need
+ * to combine any unused src bytes we have in the first
+ * buffer with subsequent bytes to try again.
+ */
- if ((unsigned) numChars > (unsigned) toRead) {
- /*
- * Got too many chars.
- */
+ ChannelBuffer *nextPtr = bufPtr->nextPtr;
+
+ if (nextPtr == NULL) {
+ if (srcLen > 0) {
+ SetFlag(statePtr, CHANNEL_NEED_MORE_DATA);
+ }
+ Tcl_SetObjLength(objPtr, numBytes);
+ return -1;
+ }
- const char *eof;
+ /*
+ * Space is made at the beginning of the buffer to copy the
+ * previous unused bytes there. Check first if the buffer we
+ * are using actually has enough space at its beginning for
+ * the data we are copying. Because if not we will write over
+ * the buffer management information, especially the 'nextPtr'.
+ *
+ * Note that the BUFFER_PADDING (See AllocChannelBuffer) is
+ * used to prevent exactly this situation. I.e. it should never
+ * happen. Therefore it is ok to panic should it happen despite
+ * the precautions.
+ */
- eof = Tcl_UtfAtIndex(dst, toRead);
- statePtr->inputEncodingState = oldState;
- Tcl_ExternalToUtf(NULL, statePtr->encoding, src, srcLen,
- statePtr->inputEncodingFlags, &statePtr->inputEncodingState,
- dst, eof - dst + TCL_UTF_MAX, &srcRead, &dstWrote, &numChars);
- dstRead = dstWrote;
- TranslateInputEOL(statePtr, dst, dst, &dstWrote, &dstRead);
- numChars -= (dstRead - dstWrote);
- }
- statePtr->inputEncodingFlags &= ~TCL_ENCODING_START;
+ if (nextPtr->nextRemoved - srcLen < 0) {
+ Tcl_Panic("Buffer Underflow, BUFFER_PADDING not enough");
+ }
- bufPtr->nextRemoved += srcRead;
- if (dstWrote > srcRead + 1) {
- *factorPtr = dstWrote * UTF_EXPANSION_FACTOR / srcRead;
+ nextPtr->nextRemoved -= srcLen;
+ memcpy(RemovePoint(nextPtr), src, (size_t) srcLen);
+ RecycleBuffer(statePtr, bufPtr, 0);
+ statePtr->inQueueHead = nextPtr;
+ Tcl_SetObjLength(objPtr, numBytes);
+ return ReadChars(statePtr, objPtr, charsToRead, factorPtr);
+ }
+
+ statePtr->inputEncodingFlags &= ~TCL_ENCODING_START;
+
+ bufPtr->nextRemoved += srcRead;
+ if (dstWrote > srcRead + 1) {
+ *factorPtr = dstWrote * UTF_EXPANSION_FACTOR / srcRead;
+ }
+ Tcl_SetObjLength(objPtr, numBytes + dstWrote);
+ return numChars;
}
- *offsetPtr += dstWrote;
- return numChars;
}
/*
@@ -5700,7 +5655,7 @@ ReadChars(
*---------------------------------------------------------------------------
*/
-static int
+static void
TranslateInputEOL(
ChannelState *statePtr, /* Channel being read, for EOL translation and
* EOF character. */
@@ -5709,133 +5664,138 @@ TranslateInputEOL(
* characters. */
const char *srcStart, /* Source characters. */
int *dstLenPtr, /* On entry, the maximum length of output
- * buffer in bytes; must be <= *srcLenPtr. On
- * exit, the number of bytes actually used in
- * output buffer. */
+ * buffer in bytes. On exit, the number of
+ * bytes actually used in output buffer. */
int *srcLenPtr) /* On entry, the length of source buffer. On
* exit, the number of bytes read from the
* source buffer. */
{
- int dstLen, srcLen, inEofChar;
- const char *eof;
+ const char *eof = NULL;
+ int dstLen = *dstLenPtr;
+ int srcLen = *srcLenPtr;
+ int inEofChar = statePtr->inEofChar;
- dstLen = *dstLenPtr;
+ /*
+ * Depending on the translation mode in use, there's no need
+ * to scan more srcLen bytes at srcStart than can possibly transform
+ * to dstLen bytes. This keeps the scan for eof char below from
+ * being pointlessly long.
+ */
+
+ switch (statePtr->inputTranslation) {
+ case TCL_TRANSLATE_LF:
+ case TCL_TRANSLATE_CR:
+ if (srcLen > dstLen) {
+ /* In these modes, each src byte become a dst byte. */
+ srcLen = dstLen;
+ }
+ break;
+ default:
+ /* In other modes, at most 2 src bytes become a dst byte. */
+ if (srcLen > 2 * dstLen) {
+ srcLen = 2 * dstLen;
+ }
+ break;
+ }
- eof = NULL;
- inEofChar = statePtr->inEofChar;
if (inEofChar != '\0') {
/*
- * Find EOF in translated buffer then compress out the EOL. The source
- * buffer may be much longer than the destination buffer - we only
- * want to return EOF if the EOF has been copied to the destination
- * buffer.
+ * Make sure we do not read past any logical end of channel input
+ * created by the presence of the input eof char.
*/
- const char *src, *srcMax;
-
- srcMax = srcStart + *srcLenPtr;
- for (src = srcStart; src < srcMax; src++) {
- if (*src == inEofChar) {
- eof = src;
- srcLen = src - srcStart;
- if (srcLen < dstLen) {
- dstLen = srcLen;
- }
- *srcLenPtr = srcLen;
- break;
- }
+ if ((eof = memchr(srcStart, inEofChar, srcLen))) {
+ srcLen = eof - srcStart;
}
}
+
switch (statePtr->inputTranslation) {
case TCL_TRANSLATE_LF:
+ case TCL_TRANSLATE_CR:
if (dstStart != srcStart) {
- memcpy(dstStart, srcStart, (size_t) dstLen);
+ memcpy(dstStart, srcStart, (size_t) srcLen);
}
- srcLen = dstLen;
- break;
- case TCL_TRANSLATE_CR: {
- char *dst, *dstEnd;
+ if (statePtr->inputTranslation == TCL_TRANSLATE_CR) {
+ char *dst = dstStart;
+ char *dstEnd = dstStart + srcLen;
- if (dstStart != srcStart) {
- memcpy(dstStart, srcStart, (size_t) dstLen);
- }
- dstEnd = dstStart + dstLen;
- for (dst = dstStart; dst < dstEnd; dst++) {
- if (*dst == '\r') {
- *dst = '\n';
+ while ((dst = memchr(dst, '\r', dstEnd - dst))) {
+ *dst++ = '\n';
}
}
- srcLen = dstLen;
+ dstLen = srcLen;
break;
- }
case TCL_TRANSLATE_CRLF: {
- char *dst;
- const char *src, *srcEnd, *srcMax;
-
- dst = dstStart;
- src = srcStart;
- srcEnd = srcStart + dstLen;
- srcMax = srcStart + *srcLenPtr;
-
- for ( ; src < srcEnd; ) {
- if (*src == '\r') {
- src++;
- if (src >= srcMax) {
- SetFlag(statePtr, INPUT_NEED_NL);
- } else if (*src == '\n') {
- *dst++ = *src++;
- } else {
+ const char *crFound, *src = srcStart;
+ char *dst = dstStart;
+ int lesser = (dstLen < srcLen) ? dstLen : srcLen;
+
+ while ((crFound = memchr(src, '\r', lesser))) {
+ int numBytes = crFound - src;
+ memmove(dst, src, numBytes);
+
+ dst += numBytes; dstLen -= numBytes;
+ src += numBytes; srcLen -= numBytes;
+ if (srcLen == 1) {
+ /* valid src bytes end in \r */
+ if (eof) {
*dst++ = '\r';
+ src++; srcLen--;
+ } else {
+ lesser = 0;
+ break;
}
+ } else if (src[1] == '\n') {
+ *dst++ = '\n';
+ src += 2; srcLen -= 2;
} else {
- *dst++ = *src++;
+ *dst++ = '\r';
+ src++; srcLen--;
}
+ dstLen--;
+ lesser = (dstLen < srcLen) ? dstLen : srcLen;
}
- srcLen = src - srcStart;
- dstLen = dst - dstStart;
+ memmove(dst, src, lesser);
+ srcLen = src + lesser - srcStart;
+ dstLen = dst + lesser - dstStart;
break;
}
case TCL_TRANSLATE_AUTO: {
- char *dst;
- const char *src, *srcEnd, *srcMax;
+ const char *crFound, *src = srcStart;
+ char *dst = dstStart;
+ int lesser;
- dst = dstStart;
- src = srcStart;
- srcEnd = srcStart + dstLen;
- srcMax = srcStart + *srcLenPtr;
-
- if (GotFlag(statePtr, INPUT_SAW_CR) && (src < srcMax)) {
- if (*src == '\n') {
- src++;
- }
+ if (GotFlag(statePtr, INPUT_SAW_CR) && srcLen) {
+ if (*src == '\n') { src++; srcLen--; }
ResetFlag(statePtr, INPUT_SAW_CR);
}
- for ( ; src < srcEnd; ) {
- if (*src == '\r') {
- src++;
- if (src >= srcMax) {
- SetFlag(statePtr, INPUT_SAW_CR);
- } else if (*src == '\n') {
- if (srcEnd < srcMax) {
- srcEnd++;
- }
- src++;
- }
- *dst++ = '\n';
- } else {
- *dst++ = *src++;
+ lesser = (dstLen < srcLen) ? dstLen : srcLen;
+ while ((crFound = memchr(src, '\r', lesser))) {
+ int numBytes = crFound - src;
+ memmove(dst, src, numBytes);
+
+ dst[numBytes] = '\n';
+ dst += numBytes + 1; dstLen -= numBytes + 1;
+ src += numBytes + 1; srcLen -= numBytes + 1;
+ if (srcLen == 0) {
+ SetFlag(statePtr, INPUT_SAW_CR);
+ } else if (*src == '\n') {
+ src++; srcLen--;
}
+ lesser = (dstLen < srcLen) ? dstLen : srcLen;
}
- srcLen = src - srcStart;
- dstLen = dst - dstStart;
+ memmove(dst, src, lesser);
+ srcLen = src + lesser - srcStart;
+ dstLen = dst + lesser - dstStart;
break;
}
default:
- return 0;
+ Tcl_Panic("unknown input translation %d", statePtr->inputTranslation);
}
*dstLenPtr = dstLen;
+ *srcLenPtr = srcLen;
- if ((eof != NULL) && (srcStart + srcLen >= eof)) {
+ if (srcStart + srcLen == eof) {
/*
* EOF character was seen in EOL translated range. Leave current file
* position pointing at the EOF character, but don't store the EOF
@@ -5844,12 +5804,8 @@ TranslateInputEOL(
SetFlag(statePtr, CHANNEL_EOF | CHANNEL_STICKY_EOF);
statePtr->inputEncodingFlags |= TCL_ENCODING_END;
- ResetFlag(statePtr, INPUT_SAW_CR | INPUT_NEED_NL);
- return 1;
+ ResetFlag(statePtr, INPUT_SAW_CR);
}
-
- *srcLenPtr = srcLen;
- return 0;
}
/*
@@ -5903,16 +5859,11 @@ Tcl_Ungets(
statePtr->flags = flags;
/*
- * If we have encountered a sticky EOF, just punt without storing (sticky
- * EOF is set if we have seen the input eofChar, to prevent reading beyond
- * the eofChar). Otherwise, clear the EOF flags, and clear the BLOCKED
- * bit. We want to discover these conditions anew in each operation.
+ * Clear the EOF flags, and clear the BLOCKED bit.
*/
- if (GotFlag(statePtr, CHANNEL_STICKY_EOF)) {
- goto done;
- }
- ResetFlag(statePtr, CHANNEL_BLOCKED | CHANNEL_EOF);
+ ResetFlag(statePtr,
+ CHANNEL_BLOCKED | CHANNEL_STICKY_EOF | CHANNEL_EOF | INPUT_SAW_CR);
bufPtr = AllocChannelBuffer(len);
memcpy(InsertPoint(bufPtr), str, (size_t) len);
@@ -6047,6 +5998,9 @@ DiscardInputQueued(
*
* Reads input data from a device into a channel buffer.
*
+ * IMPORTANT! This routine is only called on a chanPtr argument
+ * that is the top channel of a stack!
+ *
* Results:
* The return value is the Posix error code if an error occurred while
* reading from the file, or 0 otherwise.
@@ -6084,16 +6038,11 @@ GetInput(
* channel in the stack and use them. They can be the result of a
* transformation which went away without reading all the information
* placed in the area when it was stacked.
- *
- * Two possibilities for the state: No buffers in it, or a single empty
- * buffer. In the latter case we can recycle it now.
*/
if (chanPtr->inQueueHead != NULL) {
- if (statePtr->inQueueHead != NULL) {
- RecycleBuffer(statePtr, statePtr->inQueueHead, 0);
- statePtr->inQueueHead = NULL;
- }
+
+ assert(statePtr->inQueueHead == NULL);
statePtr->inQueueHead = chanPtr->inQueueHead;
statePtr->inQueueTail = chanPtr->inQueueTail;
@@ -6122,12 +6071,12 @@ GetInput(
/*
* Check the actual buffersize against the requested buffersize.
- * Buffers which are smaller than requested are squashed. This is done
+ * Saved buffers of the wrong size are squashed. This is done
* to honor dynamic changes of the buffersize made by the user.
*/
if ((bufPtr != NULL)
- && (bufPtr->bufLength - BUFFER_PADDING < statePtr->bufSize)) {
+ && (bufPtr->bufLength - BUFFER_PADDING != statePtr->bufSize)) {
ReleaseChannelBuffer(bufPtr);
bufPtr = NULL;
}
@@ -6137,21 +6086,8 @@ GetInput(
}
bufPtr->nextPtr = NULL;
- /*
- * SF #427196: Use the actual size of the buffer to determine the
- * number of bytes to read from the channel and not the size for new
- * buffers. They can be different if the buffersize was changed
- * between reads.
- *
- * Note: This affects performance negatively if the buffersize was
- * extended but this small buffer is reused for all subsequent reads.
- * The system never uses buffers with the requested bigger size in
- * that case. An adjunct patch could try and delete all unused buffers
- * it encounters and which are smaller than the formally requested
- * buffersize.
- */
-
toRead = SpaceLeft(bufPtr);
+ assert(toRead == statePtr->bufSize);
if (statePtr->inQueueTail == NULL) {
statePtr->inQueueHead = bufPtr;
@@ -6162,6 +6098,7 @@ GetInput(
}
/*
+ * TODO - consider escape before buffer alloc
* If EOF is set, we should avoid calling the driver because on some
* platforms it is impossible to read from a device after EOF.
*/
@@ -6170,33 +6107,8 @@ GetInput(
return 0;
}
-#ifdef TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING
- /*
- * [SF Tcl Bug 943274]. Better emulation of non-blocking channels for
- * channels without BlockModeProc, by keeping track of true fileevents
- * generated by the OS == Data waiting and reading if and only if we are
- * sure to have data.
- */
-
- if (GotFlag(statePtr, CHANNEL_NONBLOCKING) &&
- (Tcl_ChannelBlockModeProc(chanPtr->typePtr) == NULL) &&
- !GotFlag(statePtr, CHANNEL_HAS_MORE_DATA)) {
- /*
- * Bypass the driver, it would block, as no data is available
- */
-
- nread = -1;
- result = EWOULDBLOCK;
- } else {
-#endif /* TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING */
-
- PreserveChannelBuffer(bufPtr);
- nread = ChanRead(chanPtr, InsertPoint(bufPtr), toRead, &result);
-
-#ifdef TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING
- }
-#endif /* TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING */
-
+ PreserveChannelBuffer(bufPtr);
+ nread = ChanRead(chanPtr, InsertPoint(bufPtr), toRead, &result);
if (nread > 0) {
result = 0;
bufPtr->nextAdded += nread;
@@ -6211,18 +6123,6 @@ GetInput(
if (nread < toRead) {
SetFlag(statePtr, CHANNEL_BLOCKED);
}
-
-#ifdef TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING
- if (nread <= toRead) {
- /*
- * [SF Tcl Bug 943274] We have read the available data, clear
- * flag.
- */
-
- ResetFlag(statePtr, CHANNEL_HAS_MORE_DATA);
- }
-#endif /* TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING */
-
} else if (nread == 0) {
result = 0;
SetFlag(statePtr, CHANNEL_EOF);
@@ -6702,7 +6602,7 @@ CheckChannelErrors(
* Fail if the channel is not opened for desired operation.
*/
- if ((statePtr->flags & direction) == 0) {
+ if (GotFlag(statePtr, direction) == 0) {
Tcl_SetErrno(EACCES);
return -1;
}
@@ -6722,15 +6622,10 @@ CheckChannelErrors(
if (direction == TCL_READABLE) {
/*
- * If we have not encountered a sticky EOF, clear the EOF bit (sticky
- * EOF is set if we have seen the input eofChar, to prevent reading
- * beyond the eofChar). Also, always clear the BLOCKED bit. We want to
- * discover these conditions anew in each operation.
+ * Clear the BLOCKED bit. We want to discover this condition
+ * anew in each operation.
*/
- if (!GotFlag(statePtr, CHANNEL_STICKY_EOF)) {
- ResetFlag(statePtr, CHANNEL_EOF);
- }
ResetFlag(statePtr, CHANNEL_BLOCKED | CHANNEL_NEED_MORE_DATA);
}
@@ -6947,7 +6842,27 @@ Tcl_SetChannelBufferSize(
}
statePtr = ((Channel *) chan)->state;
+
+ if (statePtr->bufSize == sz) {
+ return;
+ }
statePtr->bufSize = sz;
+
+ /*
+ * If bufsize changes, need to get rid of old utility buffer.
+ */
+
+ if (statePtr->saveInBufPtr != NULL) {
+ RecycleBuffer(statePtr, statePtr->saveInBufPtr, 1);
+ statePtr->saveInBufPtr = NULL;
+ }
+ if ((statePtr->inQueueHead != NULL)
+ && (statePtr->inQueueHead->nextPtr == NULL)
+ && IsBufferEmpty(statePtr->inQueueHead)) {
+ RecycleBuffer(statePtr, statePtr->inQueueHead, 1);
+ statePtr->inQueueHead = NULL;
+ statePtr->inQueueTail = NULL;
+ }
}
/*
@@ -7381,6 +7296,7 @@ Tcl_SetChannelOption(
return TCL_ERROR;
}
Tcl_SetChannelBufferSize(chan, newBufferSize);
+ return TCL_OK;
} else if (HaveOpt(2, "-encoding")) {
Tcl_Encoding encoding;
@@ -7412,6 +7328,7 @@ Tcl_SetChannelOption(
statePtr->outputEncodingFlags = TCL_ENCODING_START;
ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA);
UpdateInterest(chanPtr);
+ return TCL_OK;
} else if (HaveOpt(2, "-eofchar")) {
if (Tcl_SplitList(interp, newValue, &argc, &argv) == TCL_ERROR) {
return TCL_ERROR;
@@ -7573,22 +7490,6 @@ Tcl_SetChannelOption(
return Tcl_BadChannelOption(interp, optionName, NULL);
}
- /*
- * If bufsize changes, need to get rid of old utility buffer.
- */
-
- if (statePtr->saveInBufPtr != NULL) {
- RecycleBuffer(statePtr, statePtr->saveInBufPtr, 1);
- statePtr->saveInBufPtr = NULL;
- }
- if ((statePtr->inQueueHead != NULL)
- && (statePtr->inQueueHead->nextPtr == NULL)
- && IsBufferEmpty(statePtr->inQueueHead)) {
- RecycleBuffer(statePtr, statePtr->inQueueHead, 1);
- statePtr->inQueueHead = NULL;
- statePtr->inQueueTail = NULL;
- }
-
return TCL_OK;
}
@@ -7680,21 +7581,6 @@ Tcl_NotifyChannel(
Channel *upChanPtr;
const Tcl_ChannelType *upTypePtr;
-#ifdef TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING
- /*
- * [SF Tcl Bug 943274] For a non-blocking channel without blockmodeproc we
- * keep track of actual input coming from the OS so that we can do a
- * credible imitation of non-blocking behaviour.
- */
-
- if ((mask & TCL_READABLE) &&
- GotFlag(statePtr, CHANNEL_NONBLOCKING) &&
- (Tcl_ChannelBlockModeProc(chanPtr->typePtr) == NULL) &&
- !GotFlag(statePtr, CHANNEL_TIMER_FEV)) {
- SetFlag(statePtr, CHANNEL_HAS_MORE_DATA);
- }
-#endif /* TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING */
-
/*
* In contrast to the other API functions this procedure walks towards the
* top of a stack and not down from it.
@@ -7934,29 +7820,8 @@ ChannelTimerProc(
*/
statePtr->timer = Tcl_CreateTimerHandler(0, ChannelTimerProc,chanPtr);
-
-#ifdef TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING
- /*
- * Set the TIMER flag to notify the higher levels that the driver
- * might have no data for us. We do this only if we are in
- * non-blocking mode and the driver has no BlockModeProc because only
- * then we really don't know if the driver will block or not. A
- * similar test is done in "PeekAhead".
- */
-
- if (GotFlag(statePtr, CHANNEL_NONBLOCKING) &&
- (Tcl_ChannelBlockModeProc(chanPtr->typePtr) == NULL)) {
- SetFlag(statePtr, CHANNEL_TIMER_FEV);
- }
-#endif /* TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING */
-
Tcl_Preserve(statePtr);
Tcl_NotifyChannel((Tcl_Channel) chanPtr, TCL_READABLE);
-
-#ifdef TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING
- ResetFlag(statePtr, CHANNEL_TIMER_FEV);
-#endif /* TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING */
-
Tcl_Release(statePtr);
} else {
statePtr->timer = NULL;
@@ -8369,7 +8234,7 @@ Tcl_FileEventObjCmd(
}
chanPtr = (Channel *) chan;
statePtr = chanPtr->state;
- if ((statePtr->flags & mask) == 0) {
+ if (GotFlag(statePtr, mask) == 0) {
Tcl_AppendResult(interp, "channel is not ",
(mask == TCL_READABLE) ? "readable" : "writable", NULL);
return TCL_ERROR;
@@ -8879,13 +8744,24 @@ CopyData(
*
* DoRead --
*
- * Reads a given number of bytes from a channel.
+ * Stores up to "bytesToRead" bytes in memory pointed to by "dst".
+ * These bytes come from reading the channel "chanPtr" and
+ * performing the configured translations.
*
* No encoding conversions are applied to the bytes being read.
*
* Results:
- * The number of characters read, or -1 on error. Use Tcl_GetErrno() to
- * retrieve the error code for the error that occurred.
+ * The number of bytes actually stored (<= bytesToRead),
+ * or -1 if there is an error in reading the channel. Use
+ * Tcl_GetErrno() to retrieve the error code for the error
+ * that occurred.
+ *
+ * The number of bytes stored can be less than the number
+ * requested when
+ * - EOF is reached on the channel; or
+ * - the channel is non-blocking, and we've read all we can
+ * without blocking.
+ * - a channel reading error occurs (and we return -1)
*
* Side effects:
* May cause input to be buffered.
@@ -8896,290 +8772,149 @@ CopyData(
static int
DoRead(
Channel *chanPtr, /* The channel from which to read. */
- char *bufPtr, /* Where to store input read. */
- int toRead) /* Maximum number of bytes to read. */
+ char *dst, /* Where to store input read. */
+ int bytesToRead) /* Maximum number of bytes to read. */
{
ChannelState *statePtr = chanPtr->state;
- /* State info for channel */
- int copied; /* How many characters were copied into the
- * result string? */
- int copiedNow; /* How many characters were copied from the
- * current input buffer? */
- int result; /* Of calling GetInput. */
-
- /*
- * If we have not encountered a sticky EOF, clear the EOF bit. Either way
- * clear the BLOCKED bit. We want to discover these anew during each
- * operation.
- */
+ char *p = dst;
Tcl_Preserve(chanPtr);
- if (!GotFlag(statePtr, CHANNEL_STICKY_EOF)) {
- ResetFlag(statePtr, CHANNEL_EOF);
- }
- ResetFlag(statePtr, CHANNEL_BLOCKED | CHANNEL_NEED_MORE_DATA);
-
- for (copied = 0; copied < toRead; copied += copiedNow) {
- copiedNow = CopyAndTranslateBuffer(statePtr, bufPtr + copied,
- toRead - copied);
- if (copiedNow == 0) {
- if (GotFlag(statePtr, CHANNEL_EOF)) {
- goto done;
- }
- if (GotFlag(statePtr, CHANNEL_BLOCKED)) {
- if (GotFlag(statePtr, CHANNEL_NONBLOCKING)) {
- goto done;
- }
- ResetFlag(statePtr, CHANNEL_BLOCKED);
- }
- result = GetInput(chanPtr);
- if (result != 0) {
- if (result != EAGAIN) {
- copied = -1;
- }
- goto done;
- }
- }
- }
-
- ResetFlag(statePtr, CHANNEL_BLOCKED);
-
- /*
- * Update the notifier state so we don't block while there is still data
- * in the buffers.
- */
-
- done:
- UpdateInterest(chanPtr);
- Tcl_Release(chanPtr);
- return copied;
-}
-
-/*
- *----------------------------------------------------------------------
- *
- * CopyAndTranslateBuffer --
- *
- * Copy at most one buffer of input to the result space, doing eol
- * translations according to mode in effect currently.
- *
- * Results:
- * Number of bytes stored in the result buffer (as opposed to the number
- * of bytes read from the channel). May return zero if no input is
- * available to be translated.
- *
- * Side effects:
- * Consumes buffered input. May deallocate one buffer.
- *
- *----------------------------------------------------------------------
- */
-
-static int
-CopyAndTranslateBuffer(
- ChannelState *statePtr, /* Channel state from which to read input. */
- char *result, /* Where to store the copied input. */
- int space) /* How many bytes are available in result to
- * store the copied input? */
-{
- ChannelBuffer *bufPtr; /* The buffer from which to copy bytes. */
- int bytesInBuffer; /* How many bytes are available to be copied
- * in the current input buffer? */
- int copied; /* How many characters were already copied
- * into the destination space? */
- int i; /* Iterates over the copied input looking for
- * the input eofChar. */
-
- /*
- * If there is no input at all, return zero. The invariant is that either
- * there is no buffer in the queue, or if the first buffer is empty, it is
- * also the last buffer (and thus there is no input in the queue). Note
- * also that if the buffer is empty, we leave it in the queue.
- */
-
- if (statePtr->inQueueHead == NULL) {
- return 0;
- }
- bufPtr = statePtr->inQueueHead;
- bytesInBuffer = BytesLeft(bufPtr);
+ while (bytesToRead) {
+ /*
+ * Each pass through the loop is intended to process up to
+ * one channel buffer.
+ */
- copied = 0;
- switch (statePtr->inputTranslation) {
- case TCL_TRANSLATE_LF:
- if (bytesInBuffer == 0) {
- return 0;
- }
+ int bytesRead, bytesWritten;
+ ChannelBuffer *bufPtr = statePtr->inQueueHead;
/*
- * Copy the current chunk into the result buffer.
+ * When there's no buffered data to read, and we're at EOF,
+ * escape to the caller.
*/
- if (bytesInBuffer < space) {
- space = bytesInBuffer;
+ if (GotFlag(statePtr, CHANNEL_EOF)
+ && (bufPtr == NULL || IsBufferEmpty(bufPtr))) {
+ break;
}
- memcpy(result, RemovePoint(bufPtr), (size_t) space);
- bufPtr->nextRemoved += space;
- copied = space;
- break;
- case TCL_TRANSLATE_CR: {
- char *end;
- if (bytesInBuffer == 0) {
- return 0;
- }
+ /* If there is no full buffer, attempt to create and/or fill one. */
- /*
- * Copy the current chunk into the result buffer, then replace all \r
- * with \n.
- */
+ while (bufPtr == NULL || !IsBufferFull(bufPtr)) {
+ int code;
- if (bytesInBuffer < space) {
- space = bytesInBuffer;
- }
- memcpy(result, RemovePoint(bufPtr), (size_t) space);
- bufPtr->nextRemoved += space;
- copied = space;
+ ResetFlag(statePtr, CHANNEL_BLOCKED);
+ moreData:
+ code = GetInput(chanPtr);
+ bufPtr = statePtr->inQueueHead;
- for (end = result + copied; result < end; result++) {
- if (*result == '\r') {
- *result = '\n';
+ assert (bufPtr != NULL);
+
+ if (GotFlag(statePtr, CHANNEL_EOF|CHANNEL_BLOCKED)) {
+ /* Further reads cannot do any more */
+ break;
}
+
+ if (code) {
+ /* Read error */
+ UpdateInterest(chanPtr);
+ Tcl_Release(chanPtr);
+ return -1;
+ }
+
+ assert (IsBufferFull(bufPtr));
}
- break;
- }
- case TCL_TRANSLATE_CRLF: {
- char *src, *end, *dst;
- int curByte;
- /*
- * If there is a held-back "\r" at EOF, produce it now.
- */
+ assert (bufPtr != NULL);
- if (bytesInBuffer == 0) {
- if ((statePtr->flags & (INPUT_SAW_CR | CHANNEL_EOF)) ==
- (INPUT_SAW_CR | CHANNEL_EOF)) {
- result[0] = '\r';
- ResetFlag(statePtr, INPUT_SAW_CR);
- return 1;
- }
- return 0;
- }
+ bytesRead = BytesLeft(bufPtr);
+ bytesWritten = bytesToRead;
- /*
- * Copy the current chunk and replace "\r\n" with "\n" (but not
- * standalone "\r"!).
- */
+ TranslateInputEOL(statePtr, p, RemovePoint(bufPtr),
+ &bytesWritten, &bytesRead);
+ bufPtr->nextRemoved += bytesRead;
+ p += bytesWritten;
+ bytesToRead -= bytesWritten;
- if (bytesInBuffer < space) {
- space = bytesInBuffer;
- }
- memcpy(result, RemovePoint(bufPtr), (size_t) space);
- bufPtr->nextRemoved += space;
- copied = space;
+ if (!IsBufferEmpty(bufPtr)) {
+ /*
+ * Buffer is not empty. How can that be?
+ *
+ * 0) We stopped early because we got all the bytes
+ * we were seeking. That's fine.
+ */
- end = result + copied;
- dst = result;
- for (src = result; src < end; src++) {
- curByte = *src;
- if (curByte == '\n') {
- ResetFlag(statePtr, INPUT_SAW_CR);
- } else if (GotFlag(statePtr, INPUT_SAW_CR)) {
- ResetFlag(statePtr, INPUT_SAW_CR);
- *dst = '\r';
- dst++;
+ if (bytesToRead == 0) {
+ UpdateInterest(chanPtr);
+ break;
}
- if (curByte == '\r') {
- SetFlag(statePtr, INPUT_SAW_CR);
- } else {
- *dst = (char) curByte;
- dst++;
+
+ /*
+ * 1) We're @EOF because we saw eof char.
+ */
+
+ if (statePtr->inEofChar
+ && RemovePoint(bufPtr)[0] == statePtr->inEofChar) {
+ UpdateInterest(chanPtr);
+ break;
}
- }
- copied = dst - result;
- break;
- }
- case TCL_TRANSLATE_AUTO: {
- char *src, *end, *dst;
- int curByte;
- if (bytesInBuffer == 0) {
- return 0;
- }
+ /*
+ * 2) The buffer holds a \r while in CRLF translation,
+ * followed by the end of the buffer.
+ */
- /*
- * Loop over the current buffer, converting "\r" and "\r\n" to "\n".
- */
+ assert(statePtr->inputTranslation == TCL_TRANSLATE_CRLF);
+ assert(RemovePoint(bufPtr)[0] == '\r');
+ assert(BytesLeft(bufPtr) == 1);
- if (bytesInBuffer < space) {
- space = bytesInBuffer;
- }
- memcpy(result, RemovePoint(bufPtr), (size_t) space);
- bufPtr->nextRemoved += space;
- copied = space;
+ if (bufPtr->nextPtr == NULL) {
+ /* There's no more buffered data.... */
- end = result + copied;
- dst = result;
- for (src = result; src < end; src++) {
- curByte = *src;
- if (curByte == '\r') {
- SetFlag(statePtr, INPUT_SAW_CR);
- *dst = '\n';
- dst++;
- } else {
- if ((curByte != '\n') || !GotFlag(statePtr, INPUT_SAW_CR)) {
- *dst = (char) curByte;
- dst++;
+ if (GotFlag(statePtr, CHANNEL_EOF)) {
+ /* ...and there never will be. */
+
+ *p++ = '\r';
+ bytesToRead--;
+ bufPtr->nextRemoved++;
+ } else if (GotFlag(statePtr, CHANNEL_BLOCKED)) {
+ /* ...and we cannot get more now. */
+ SetFlag(statePtr, CHANNEL_NEED_MORE_DATA);
+ UpdateInterest(chanPtr);
+ break;
+ } else {
+ /* ... so we need to get some. */
+ goto moreData;
}
- ResetFlag(statePtr, INPUT_SAW_CR);
}
- }
- copied = dst - result;
- break;
- }
- default:
- Tcl_Panic("unknown eol translation mode");
- }
- /*
- * If an in-stream EOF character is set for this channel, check that the
- * input we copied so far does not contain the EOF char. If it does, copy
- * only up to and excluding that character.
- */
+ if (bufPtr->nextPtr) {
+ /* There's a next buffer. Shift orphan \r to it. */
- if (statePtr->inEofChar != 0) {
- for (i = 0; i < copied; i++) {
- if (result[i] == (char) statePtr->inEofChar) {
- /*
- * Set sticky EOF so that no further input is presented to the
- * caller.
- */
+ ChannelBuffer *nextPtr = bufPtr->nextPtr;
- SetFlag(statePtr, CHANNEL_EOF | CHANNEL_STICKY_EOF);
- statePtr->inputEncodingFlags |= TCL_ENCODING_END;
- copied = i;
- break;
+ nextPtr->nextRemoved -= 1;
+ RemovePoint(nextPtr)[0] = '\r';
+ bufPtr->nextRemoved++;
}
}
- }
- /*
- * If the current buffer is empty recycle it.
- */
+ if (IsBufferEmpty(bufPtr)) {
+ statePtr->inQueueHead = bufPtr->nextPtr;
+ if (statePtr->inQueueHead == NULL) {
+ statePtr->inQueueTail = NULL;
+ }
+ RecycleBuffer(statePtr, bufPtr, 0);
+ }
- if (IsBufferEmpty(bufPtr)) {
- statePtr->inQueueHead = bufPtr->nextPtr;
- if (statePtr->inQueueHead == NULL) {
- statePtr->inQueueTail = NULL;
+ if (GotFlag(statePtr, CHANNEL_NONBLOCKING)
+ && GotFlag(statePtr, CHANNEL_BLOCKED)) {
+ break;
}
- RecycleBuffer(statePtr, bufPtr, 0);
}
- /*
- * Return the number of characters copied into the result buffer. This may
- * be different from the number of bytes consumed, because of EOL
- * translations.
- */
-
- return copied;
+ Tcl_Release(chanPtr);
+ return (int)(p - dst);
}
/*
@@ -10542,6 +10277,7 @@ SetChannelFromAny(
}
if (objPtr->typePtr == &chanObjType) {
/*
+ * TODO: TAINT Flag and dup'd channel values?
* The channel is valid until any call to DetachChannel occurs.
* Ensure consistency checks are done.
*/
@@ -10624,13 +10360,8 @@ DumpFlags(
ChanFlag('S', CHANNEL_STICKY_EOF);
ChanFlag('B', CHANNEL_BLOCKED);
ChanFlag('/', INPUT_SAW_CR);
- ChanFlag('*', INPUT_NEED_NL);
ChanFlag('D', CHANNEL_DEAD);
ChanFlag('R', CHANNEL_RAW_MODE);
-#ifdef TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING
- ChanFlag('T', CHANNEL_TIMER_FEV);
- ChanFlag('H', CHANNEL_HAS_MORE_DATA);
-#endif /* TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING */
ChanFlag('x', CHANNEL_INCLOSE);
buf[i] ='\0';
diff --git a/generic/tclIO.h b/generic/tclIO.h
index 0ea7d1c..b8fb5be 100644
--- a/generic/tclIO.h
+++ b/generic/tclIO.h
@@ -253,9 +253,6 @@ typedef struct ChannelState {
#define INPUT_SAW_CR (1<<12) /* Channel is in CRLF eol input
* translation mode and the last byte
* seen was a "\r". */
-#define INPUT_NEED_NL (1<<15) /* Saw a '\r' at end of last buffer,
- * and there should be a '\n' at
- * beginning of next buffer. */
#define CHANNEL_DEAD (1<<13) /* The channel has been closed by the
* exit handler (on exit) but not
* deallocated. When any IO operation
@@ -275,29 +272,6 @@ typedef struct ChannelState {
* changes. */
#define CHANNEL_RAW_MODE (1<<16) /* When set, notes that the Raw API is
* being used. */
-#ifdef TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING
-#define CHANNEL_TIMER_FEV (1<<17) /* When set the event we are notified
- * by is a fileevent generated by a
- * timer. We don't know if the driver
- * has more data and should not try to
- * read from it. If the system needs
- * more than is in the buffers out
- * read routines will simulate a short
- * read (0 characters read) */
-#define CHANNEL_HAS_MORE_DATA (1<<18) /* Set by NotifyChannel for a channel
- * if and only if the channel is
- * configured non-blocking, the driver
- * for said channel has no
- * blockmodeproc, and data has arrived
- * for reading at the OS level). A
- * GetInput will pass reading from the
- * driver if the channel is
- * non-blocking, without blockmode
- * proc and the flag has not been set.
- * A read will be performed if the
- * flag is set. This will reset the
- * flag as well. */
-#endif /* TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING */
#define CHANNEL_INCLOSE (1<<19) /* Channel is currently being closed.
* Its structures are still live and
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 2353450..b30650d 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -2480,6 +2480,8 @@ MODULE_SCOPE char tclEmptyString;
*----------------------------------------------------------------
*/
+MODULE_SCOPE void TclAppendBytesToByteArray(Tcl_Obj *objPtr,
+ const unsigned char *bytes, int len);
MODULE_SCOPE void TclAdvanceContinuations(int* line, int** next, int loc);
MODULE_SCOPE void TclAdvanceLines(int *line, const char *start,
const char *end);
@@ -2571,6 +2573,8 @@ MODULE_SCOPE int TclGetOpenModeEx(Tcl_Interp *interp,
int *binaryPtr);
MODULE_SCOPE Tcl_Obj * TclGetProcessGlobalValue(ProcessGlobalValue *pgvPtr);
MODULE_SCOPE const char *TclGetSrcInfoForCmd(Interp *iPtr, int *lenPtr);
+MODULE_SCOPE char * TclGetStringStorage(Tcl_Obj *objPtr,
+ unsigned int *sizePtr);
MODULE_SCOPE int TclGlob(Tcl_Interp *interp, char *pattern,
Tcl_Obj *unquotedPrefix, int globFlags,
Tcl_GlobTypeData *types);
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index a929d04..8c6a376 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -1139,7 +1139,8 @@ Tcl_AppendLimitedToObj(
if (ellipsis == NULL) {
ellipsis = "...";
}
- toCopy = Tcl_UtfPrev(bytes+limit+1-strlen(ellipsis), bytes) - bytes;
+ toCopy = (bytes == NULL) ? limit
+ : Tcl_UtfPrev(bytes+limit+1-strlen(ellipsis), bytes) - bytes;
}
/*
@@ -1386,7 +1387,8 @@ AppendUnicodeToUnicodeRep(
* due to the reallocs below.
*/
int offset = -1;
- if (unicode >= stringPtr->unicode && unicode <= stringPtr->unicode
+ if (unicode && unicode >= stringPtr->unicode
+ && unicode <= stringPtr->unicode
+ stringPtr->uallocated / sizeof(Tcl_UniChar)) {
offset = unicode - stringPtr->unicode;
}
@@ -1405,8 +1407,10 @@ AppendUnicodeToUnicodeRep(
* trailing null.
*/
- memcpy(stringPtr->unicode + stringPtr->numChars, unicode,
- appendNumChars * sizeof(Tcl_UniChar));
+ if (unicode) {
+ memcpy(stringPtr->unicode + stringPtr->numChars, unicode,
+ appendNumChars * sizeof(Tcl_UniChar));
+ }
stringPtr->unicode[numChars] = 0;
stringPtr->numChars = numChars;
stringPtr->allocated = 0;
@@ -1478,8 +1482,8 @@ AppendUtfToUnicodeRep(
int numBytes) /* Number of bytes of "bytes" to convert. */
{
Tcl_DString dsPtr;
- int numChars;
- Tcl_UniChar *unicode;
+ int numChars = numBytes;
+ Tcl_UniChar *unicode = NULL;
if (numBytes < 0) {
numBytes = (bytes ? strlen(bytes) : 0);
@@ -1489,8 +1493,11 @@ AppendUtfToUnicodeRep(
}
Tcl_DStringInit(&dsPtr);
- numChars = Tcl_NumUtfChars(bytes, numBytes);
- unicode = (Tcl_UniChar *)Tcl_UtfToUniCharDString(bytes, numBytes, &dsPtr);
+ if (bytes) {
+ numChars = Tcl_NumUtfChars(bytes, numBytes);
+ unicode = (Tcl_UniChar *) Tcl_UtfToUniCharDString(bytes, numBytes,
+ &dsPtr);
+ }
AppendUnicodeToUnicodeRep(objPtr, unicode, numChars);
Tcl_DStringFree(&dsPtr);
}
@@ -1547,7 +1554,7 @@ AppendUtfToUtfRep(
* due to the reallocs below.
*/
int offset = -1;
- if (bytes >= objPtr->bytes
+ if (bytes && bytes >= objPtr->bytes
&& bytes <= objPtr->bytes + objPtr->length) {
offset = bytes - objPtr->bytes;
}
@@ -1585,7 +1592,9 @@ AppendUtfToUtfRep(
stringPtr->numChars = -1;
stringPtr->hasUnicode = 0;
- memcpy(objPtr->bytes + oldLength, bytes, (size_t) numBytes);
+ if (bytes) {
+ memcpy(objPtr->bytes + oldLength, bytes, (size_t) numBytes);
+ }
objPtr->bytes[newLength] = 0;
objPtr->length = newLength;
}
@@ -2702,6 +2711,38 @@ Tcl_ObjPrintf(
/*
*---------------------------------------------------------------------------
*
+ * TclGetStringStorage --
+ *
+ * Returns the string storage space of a Tcl_Obj.
+ *
+ * Results:
+ * The pointer value objPtr->bytes is returned and the number of bytes
+ * allocated there is written to *sizePtr (if known).
+ *
+ * Side effects:
+ * May set objPtr->bytes.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+char *
+TclGetStringStorage(
+ Tcl_Obj *objPtr,
+ unsigned int *sizePtr)
+{
+ String *stringPtr;
+
+ if (objPtr->typePtr != &tclStringType || objPtr->bytes == NULL) {
+ return TclGetStringFromObj(objPtr, (int *)sizePtr);
+ }
+
+ stringPtr = GET_STRING(objPtr);
+ *sizePtr = stringPtr->allocated;
+ return objPtr->bytes;
+}
+/*
+ *---------------------------------------------------------------------------
+ *
* TclStringObjReverse --
*
* Implements the [string reverse] operation.
diff --git a/tests/io.test b/tests/io.test
index 4791280..7707a28 100644
--- a/tests/io.test
+++ b/tests/io.test
@@ -4729,7 +4729,7 @@ test io-35.18a {Tcl_Eof, eof char, cr write, crlf read} -body {
close $f
list $s $l $e [scan [string index $in end] %c]
} -result {9 8 1 13}
-test io-35.18b {Tcl_Eof, eof char, cr write, crlf read} -constraints knownBug -body {
+test io-35.18b {Tcl_Eof, eof char, cr write, crlf read} -body {
file delete $path(test1)
set f [open $path(test1) w]
fconfigure $f -translation cr -eofchar \x1a
@@ -4772,6 +4772,21 @@ test io-35.19 {Tcl_Eof, eof char in middle, cr write, crlf read} -body {
close $f
list $c $l $e [scan [string index $in end] %c]
} -result {17 8 1 13}
+test io-35.20 {Tcl_Eof, eof char in middle, cr write, crlf read} {
+ file delete $path(test1)
+ set f [open $path(test1) w]
+ fconfigure $f -translation cr -eofchar {}
+ set i [format \n%cqrsuvw 26]
+ puts $f $i
+ close $f
+ set c [file size $path(test1)]
+ set f [open $path(test1) r]
+ fconfigure $f -translation crlf -eofchar \x1a
+ set l [string length [set in [read $f]]]
+ set e [eof $f]
+ close $f
+ list $c $l $e [scan [string index $in end] %c]
+} {9 1 1 13}
# Test Tcl_InputBlocked
@@ -6801,6 +6816,147 @@ test io-52.11 {TclCopyChannel & encodings} {fcopy} {
file size $path(kyrillic.txt)
} 3
+test io-52.12 {coverage of -translation auto} {
+ file delete $path(test1) $path(test2)
+ set out [open $path(test1) wb]
+ chan configure $out -translation lf
+ puts -nonewline $out abcdefg\rhijklmn\nopqrstu\r\nvwxyz
+ close $out
+ set in [open $path(test1)]
+ chan configure $in -buffersize 8
+ set out [open $path(test2) w]
+ fcopy $in $out
+ close $in
+ close $out
+ file size $path(test2)
+} 29
+test io-52.13 {coverage of -translation cr} {
+ file delete $path(test1) $path(test2)
+ set out [open $path(test1) wb]
+ chan configure $out -translation lf
+ puts -nonewline $out abcdefg\rhijklmn\nopqrstu\r\nvwxyz
+ close $out
+ set in [open $path(test1)]
+ chan configure $in -buffersize 8 -translation cr
+ set out [open $path(test2) w]
+ fcopy $in $out
+ close $in
+ close $out
+ file size $path(test2)
+} 30
+test io-52.14 {coverage of -translation crlf} {
+ file delete $path(test1) $path(test2)
+ set out [open $path(test1) wb]
+ chan configure $out -translation lf
+ puts -nonewline $out abcdefg\rhijklmn\nopqrstu\r\nvwxyz
+ close $out
+ set in [open $path(test1)]
+ chan configure $in -buffersize 8 -translation crlf
+ set out [open $path(test2) w]
+ fcopy $in $out
+ close $in
+ close $out
+ file size $path(test2)
+} 29
+test io-52.14.1 {coverage of -translation crlf} {
+ file delete $path(test1) $path(test2)
+ set out [open $path(test1) wb]
+ chan configure $out -translation lf
+ puts -nonewline $out abcdefg\rhijklmn\nopqrstu\r\nvwxyz
+ close $out
+ set in [open $path(test1)]
+ chan configure $in -buffersize 8 -translation crlf
+ set out [open $path(test2) w]
+ fcopy $in $out -size 2
+ close $in
+ close $out
+ file size $path(test2)
+} 2
+test io-52.14.2 {coverage of -translation crlf} {
+ file delete $path(test1) $path(test2)
+ set out [open $path(test1) wb]
+ chan configure $out -translation lf
+ puts -nonewline $out abcdefg\rhijklmn\nopqrstu\r\nvwxyz
+ close $out
+ set in [open $path(test1)]
+ chan configure $in -translation crlf
+ set out [open $path(test2) w]
+ fcopy $in $out -size 9
+ close $in
+ close $out
+ file size $path(test2)
+} 9
+test io-52.15 {coverage of -translation crlf} {
+ file delete $path(test1) $path(test2)
+ set out [open $path(test1) wb]
+ chan configure $out -translation lf
+ puts -nonewline $out abcdefg\r
+ close $out
+ set in [open $path(test1)]
+ chan configure $in -buffersize 8 -translation crlf
+ set out [open $path(test2) w]
+ fcopy $in $out
+ close $in
+ close $out
+ file size $path(test2)
+} 8
+test io-52.16 {coverage of eofChar handling} {
+ file delete $path(test1) $path(test2)
+ set out [open $path(test1) wb]
+ chan configure $out -translation lf
+ puts -nonewline $out abcdefg\rhijklmn\nopqrstu\r\nvwxyz
+ close $out
+ set in [open $path(test1)]
+ chan configure $in -buffersize 8 -translation lf -eofchar a
+ set out [open $path(test2) w]
+ fcopy $in $out
+ close $in
+ close $out
+ file size $path(test2)
+} 0
+test io-52.17 {coverage of eofChar handling} {
+ file delete $path(test1) $path(test2)
+ set out [open $path(test1) wb]
+ chan configure $out -translation lf
+ puts -nonewline $out abcdefg\rhijklmn\nopqrstu\r\nvwxyz
+ close $out
+ set in [open $path(test1)]
+ chan configure $in -buffersize 8 -translation lf -eofchar d
+ set out [open $path(test2) w]
+ fcopy $in $out
+ close $in
+ close $out
+ file size $path(test2)
+} 3
+test io-52.18 {coverage of eofChar handling} {
+ file delete $path(test1) $path(test2)
+ set out [open $path(test1) wb]
+ chan configure $out -translation lf
+ puts -nonewline $out abcdefg\rhijklmn\nopqrstu\r\nvwxyz
+ close $out
+ set in [open $path(test1)]
+ chan configure $in -buffersize 8 -translation crlf -eofchar h
+ set out [open $path(test2) w]
+ fcopy $in $out
+ close $in
+ close $out
+ file size $path(test2)
+} 8
+test io-52.19 {coverage of eofChar handling} {
+ file delete $path(test1) $path(test2)
+ set out [open $path(test1) wb]
+ chan configure $out -translation lf
+ puts -nonewline $out abcdefg\rhijklmn\nopqrstu\r\nvwxyz
+ close $out
+ set in [open $path(test1)]
+ chan configure $in -buffersize 10 -translation crlf -eofchar h
+ set out [open $path(test2) w]
+ fcopy $in $out
+ close $in
+ close $out
+ file size $path(test2)
+} 8
+
test io-53.1 {CopyData} {fcopy} {
file delete $path(test1)
set f1 [open $thisScript]