From 7460f22cc7e783b1dd480c2fbf8ef6fc90a0360c Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 21 Jan 2014 21:32:53 +0000 Subject: Backport of bytearray append machinery to support bug fixes in ReadBytes. --- generic/tclBinary.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++++ generic/tclIO.c | 17 +++------- generic/tclInt.h | 2 ++ 3 files changed, 98 insertions(+), 13 deletions(-) diff --git a/generic/tclBinary.c b/generic/tclBinary.c index dbb296b..68289f2 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -549,6 +549,98 @@ UpdateStringOfByteArray( /* *---------------------------------------------------------------------- * + * TclAppendBytesToByteArray -- + * + * This function appends an array of bytes to a byte array object. Note + * that the object *must* be unshared, and the array of bytes *must not* + * refer to the object being appended to. + * + * Results: + * None. + * + * Side effects: + * Allocates enough memory for an array of bytes of the requested total + * size, or possibly larger. [Bug 2992970] + * + *---------------------------------------------------------------------- + */ + +#define TCL_MIN_GROWTH 1024 +void +TclAppendBytesToByteArray( + Tcl_Obj *objPtr, + const unsigned char *bytes, + int len) +{ + ByteArray *byteArrayPtr; + int needed; + + if (Tcl_IsShared(objPtr)) { + Tcl_Panic("%s called with shared object","TclAppendBytesToByteArray"); + } + if (len < 0) { + Tcl_Panic("%s must be called with definite number of bytes to append", + "TclAppendBytesToByteArray"); + } + if (len == 0) { + /* Append zero bytes is a no-op. */ + return; + } + if (objPtr->typePtr != &tclByteArrayType) { + SetByteArrayFromAny(NULL, objPtr); + } + byteArrayPtr = GET_BYTEARRAY(objPtr); + + if (len > INT_MAX - byteArrayPtr->used) { + Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX); + } + + needed = byteArrayPtr->used + len; + /* + * If we need to, resize the allocated space in the byte array. + */ + + if (needed > byteArrayPtr->allocated) { + ByteArray *ptr = NULL; + int attempt; + + if (needed <= INT_MAX/2) { + /* Try to allocate double the total space that is needed. */ + attempt = 2 * needed; + ptr = (ByteArray *) attemptckrealloc((void *) byteArrayPtr, + BYTEARRAY_SIZE(attempt)); + } + if (ptr == NULL) { + /* Try to allocate double the increment that is needed (plus). */ + unsigned int limit = INT_MAX - needed; + unsigned int extra = len + TCL_MIN_GROWTH; + int growth = (int) ((extra > limit) ? limit : extra); + + attempt = needed + growth; + ptr = (ByteArray *) attemptckrealloc((void *) byteArrayPtr, + BYTEARRAY_SIZE(attempt)); + } + if (ptr == NULL) { + /* Last chance: Try to allocate exactly what is needed. */ + attempt = needed; + ptr = (ByteArray *) ckrealloc((void *)byteArrayPtr, + BYTEARRAY_SIZE(attempt)); + } + byteArrayPtr = ptr; + byteArrayPtr->allocated = attempt; + SET_BYTEARRAY(objPtr, byteArrayPtr); + } + + if (bytes) { + memcpy(byteArrayPtr->bytes + byteArrayPtr->used, bytes, len); + } + byteArrayPtr->used += len; + TclInvalidateStringRep(objPtr); +} + +/* + *---------------------------------------------------------------------- + * * Tcl_BinaryObjCmd -- * * This procedure implements the "binary" Tcl command. diff --git a/generic/tclIO.c b/generic/tclIO.c index f1d8909..c1b7ee9 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5586,20 +5586,8 @@ ReadBytes( toRead = srcLen; } + TclAppendBytesToByteArray(objPtr, NULL, toRead); dst = (char *) Tcl_GetByteArrayFromObj(objPtr, &length); - if (toRead > length - offset - 1) { - /* - * Double the existing size of the object or make enough room to hold - * all the characters we may get from the source buffer, whichever is - * larger. - */ - - length = offset * 2; - if (offset < toRead) { - length = offset + toRead + 1; - } - dst = (char *) Tcl_SetByteArrayLength(objPtr, length); - } dst += offset; if (statePtr->flags & INPUT_NEED_NL) { @@ -5607,6 +5595,7 @@ ReadBytes( if ((srcLen == 0) || (*src != '\n')) { *dst = '\r'; *offsetPtr += 1; + Tcl_SetByteArrayLength(objPtr, *offsetPtr); return 1; } *dst++ = '\n'; @@ -5619,11 +5608,13 @@ ReadBytes( dstWrote = toRead; if (TranslateInputEOL(statePtr, dst, src, &dstWrote, &srcRead) != 0) { if (dstWrote == 0) { + Tcl_SetByteArrayLength(objPtr, *offsetPtr); return -1; } } bufPtr->nextRemoved += srcRead; *offsetPtr += dstWrote; + Tcl_SetByteArrayLength(objPtr, *offsetPtr); return dstWrote; } diff --git a/generic/tclInt.h b/generic/tclInt.h index dc28b97..64d39a0 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2477,6 +2477,8 @@ MODULE_SCOPE char tclEmptyString; *---------------------------------------------------------------- */ +MODULE_SCOPE void TclAppendBytesToByteArray(Tcl_Obj *objPtr, + const unsigned char *bytes, int len); MODULE_SCOPE void TclAdvanceContinuations(int* line, int** next, int loc); MODULE_SCOPE void TclAdvanceLines(int *line, const char *start, const char *end); -- cgit v0.12 From abe23bfb4ef65eb899170e5ae7c4efc030294b31 Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 21 Jan 2014 22:08:16 +0000 Subject: There is no need for ReadBytes() or its caller(s) to track how many bytes are actually stored in objPtr. The ByteArray Tcl_ObjType already has the machinery to take care of this. --- generic/tclIO.c | 47 ++++++++++++++++++----------------------------- 1 file changed, 18 insertions(+), 29 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index c1b7ee9..4a5d8f1 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -208,7 +208,7 @@ static int HaveVersion(const Tcl_ChannelType *typePtr, static void PeekAhead(Channel *chanPtr, char **dstEndPtr, GetsState *gsPtr); static int ReadBytes(ChannelState *statePtr, Tcl_Obj *objPtr, - int charsLeft, int *offsetPtr); + int charsLeft); static int ReadChars(ChannelState *statePtr, Tcl_Obj *objPtr, int charsLeft, int *offsetPtr, int *factorPtr); static void RecycleBuffer(ChannelState *statePtr, @@ -5448,12 +5448,10 @@ DoReadChars( */ TclGetString(objPtr); + offset = 0; } - offset = 0; } else { - if (encoding == NULL) { - Tcl_GetByteArrayFromObj(objPtr, &offset); - } else { + if (encoding) { TclGetStringFromObj(objPtr, &offset); } } @@ -5462,7 +5460,7 @@ DoReadChars( copiedNow = -1; if (statePtr->inQueueHead != NULL) { if (encoding == NULL) { - copiedNow = ReadBytes(statePtr, objPtr, toRead, &offset); + copiedNow = ReadBytes(statePtr, objPtr, toRead); } else { copiedNow = ReadChars(statePtr, objPtr, toRead, &offset, &factor); @@ -5510,9 +5508,7 @@ DoReadChars( } ResetFlag(statePtr, CHANNEL_BLOCKED); - if (encoding == NULL) { - Tcl_SetByteArrayLength(objPtr, offset); - } else { + if (encoding) { Tcl_SetObjLength(objPtr, offset); } @@ -5540,13 +5536,11 @@ DoReadChars( * allocated to hold data read from the channel as needed. * * Results: - * The return value is the number of bytes appended to the object and - * *offsetPtr is filled with the total number of bytes in the object - * (greater than the return value if there were already bytes in the - * object). + * The return value is the number of bytes appended to the object, or + * -1 to indicate that zero bytes were read due to an EOF. * * Side effects: - * None. + * The storage of bytes in objPtr can cause (re-)allocation of memory. * *--------------------------------------------------------------------------- */ @@ -5559,24 +5553,18 @@ ReadBytes( * been allocated to hold data, not how many * bytes of data have been stored in the * object. */ - int bytesToRead, /* Maximum number of bytes to store, or < 0 to + int bytesToRead) /* Maximum number of bytes to store, or < 0 to * get all available bytes. Bytes are obtained * from the first buffer in the queue - even * if this number is larger than the number of * bytes available in the first buffer, only * the bytes from the first buffer are * returned. */ - int *offsetPtr) /* On input, contains how many bytes of objPtr - * have been used to hold data. On output, - * filled with how many bytes are now being - * used. */ { - int toRead, srcLen, offset, length, srcRead, dstWrote; + int toRead, srcLen, length, srcRead, dstWrote; ChannelBuffer *bufPtr; char *src, *dst; - offset = *offsetPtr; - bufPtr = statePtr->inQueueHead; src = RemovePoint(bufPtr); srcLen = BytesLeft(bufPtr); @@ -5586,16 +5574,17 @@ ReadBytes( toRead = srcLen; } + (void) Tcl_GetByteArrayFromObj(objPtr, &length); TclAppendBytesToByteArray(objPtr, NULL, toRead); - dst = (char *) Tcl_GetByteArrayFromObj(objPtr, &length); - dst += offset; + dst = (char *) Tcl_GetByteArrayFromObj(objPtr, NULL); + dst += length; if (statePtr->flags & INPUT_NEED_NL) { ResetFlag(statePtr, INPUT_NEED_NL); if ((srcLen == 0) || (*src != '\n')) { *dst = '\r'; - *offsetPtr += 1; - Tcl_SetByteArrayLength(objPtr, *offsetPtr); + length += 1; + Tcl_SetByteArrayLength(objPtr, length); return 1; } *dst++ = '\n'; @@ -5608,13 +5597,13 @@ ReadBytes( dstWrote = toRead; if (TranslateInputEOL(statePtr, dst, src, &dstWrote, &srcRead) != 0) { if (dstWrote == 0) { - Tcl_SetByteArrayLength(objPtr, *offsetPtr); + Tcl_SetByteArrayLength(objPtr, length); return -1; } } bufPtr->nextRemoved += srcRead; - *offsetPtr += dstWrote; - Tcl_SetByteArrayLength(objPtr, *offsetPtr); + length += dstWrote; + Tcl_SetByteArrayLength(objPtr, length); return dstWrote; } -- cgit v0.12 From 8703cd164100a81207d646099206dcc3acdf05bb Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 27 Jan 2014 17:35:27 +0000 Subject: Revise the Tcl_Append* machinery to tolerate NULL bytes to append. Then have ReadChars() use that machinery to resize buffer receiving input, rather than invent its own version. Simplify ReadChars() callers. --- generic/tclIO.c | 67 +++++++++++--------------------------------------- generic/tclStringObj.c | 29 ++++++++++++++-------- 2 files changed, 34 insertions(+), 62 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 972cbd8..40573d7 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -210,7 +210,7 @@ static void PeekAhead(Channel *chanPtr, char **dstEndPtr, static int ReadBytes(ChannelState *statePtr, Tcl_Obj *objPtr, int charsLeft); static int ReadChars(ChannelState *statePtr, Tcl_Obj *objPtr, - int charsLeft, int *offsetPtr, int *factorPtr); + int charsLeft, int *factorPtr); static void RecycleBuffer(ChannelState *statePtr, ChannelBuffer *bufPtr, int mustDiscard); static int StackSetBlockMode(Channel *chanPtr, int mode); @@ -5425,7 +5425,7 @@ DoReadChars( ChannelState *statePtr = chanPtr->state; /* State info for channel */ ChannelBuffer *bufPtr; - int offset, factor, copied, copiedNow, result; + int factor, copied, copiedNow, result; Tcl_Encoding encoding; #define UTF_EXPANSION_FACTOR 1024 @@ -5447,14 +5447,11 @@ DoReadChars( * We're going to access objPtr->bytes directly, so we must ensure * that this is actually a string object (otherwise it might have * been pure Unicode). + * + * Probably not needed anymore. */ TclGetString(objPtr); - offset = 0; - } - } else { - if (encoding) { - TclGetStringFromObj(objPtr, &offset); } } @@ -5464,8 +5461,7 @@ DoReadChars( if (encoding == NULL) { copiedNow = ReadBytes(statePtr, objPtr, toRead); } else { - copiedNow = ReadChars(statePtr, objPtr, toRead, &offset, - &factor); + copiedNow = ReadChars(statePtr, objPtr, toRead, &factor); } /* @@ -5510,9 +5506,6 @@ DoReadChars( } ResetFlag(statePtr, CHANNEL_BLOCKED); - if (encoding) { - Tcl_SetObjLength(objPtr, offset); - } /* * Update the notifier state so we don't block while there is still data @@ -5651,17 +5644,13 @@ ReadChars( * available in the first buffer, only the * characters from the first buffer are * returned. */ - int *offsetPtr, /* On input, contains how many bytes of objPtr - * have been used to hold data. On output, - * filled with how many bytes are now being - * used. */ int *factorPtr) /* On input, contains a guess of how many * bytes need to be allocated to hold the * result of converting N source bytes to * UTF-8. On output, contains another guess * based on the data seen so far. */ { - int toRead, factor, offset, spaceLeft, srcLen, dstNeeded; + int toRead, factor, srcLen, dstNeeded, numBytes; int srcRead, dstWrote, numChars, dstRead; ChannelBuffer *bufPtr; char *src, *dst; @@ -5669,7 +5658,6 @@ ReadChars( int encEndFlagSuppressed = 0; factor = *factorPtr; - offset = *offsetPtr; bufPtr = statePtr->inQueueHead; src = RemovePoint(bufPtr); @@ -5687,37 +5675,9 @@ ReadChars( */ dstNeeded = TCL_UTF_MAX - 1 + toRead * factor / UTF_EXPANSION_FACTOR; - spaceLeft = objPtr->length - offset; - - if (dstNeeded > spaceLeft) { - /* - * Double the existing size of the object or make enough room to hold - * all the characters we want from the source buffer, whichever is - * larger. - */ - - int length = offset + ((offset < dstNeeded) ? dstNeeded : offset); - - if (Tcl_AttemptSetObjLength(objPtr, length) == 0) { - length = offset + dstNeeded; - if (Tcl_AttemptSetObjLength(objPtr, length) == 0) { - dstNeeded = TCL_UTF_MAX - 1 + toRead; - length = offset + dstNeeded; - Tcl_SetObjLength(objPtr, length); - } - } - spaceLeft = length - offset; - } - if (toRead == srcLen) { - /* - * Want to convert the whole buffer in one pass. If we have enough - * space, convert it using all available space in object rather than - * using the factor. - */ - - dstNeeded = spaceLeft; - } - dst = objPtr->bytes + offset; + (void) TclGetStringFromObj(objPtr, &numBytes); + Tcl_AppendToObj(objPtr, NULL, dstNeeded); + dst = TclGetString(objPtr) + numBytes; /* * [Bug 1462248]: The cause of the crash reported in this bug is this: @@ -5788,7 +5748,7 @@ ReadChars( *dst = '\r'; } statePtr->inputEncodingFlags &= ~TCL_ENCODING_START; - *offsetPtr += 1; + Tcl_SetObjLength(objPtr, numBytes + 1); if (encEndFlagSuppressed) { statePtr->inputEncodingFlags |= TCL_ENCODING_END; @@ -5829,6 +5789,7 @@ ReadChars( SetFlag(statePtr, CHANNEL_NEED_MORE_DATA); } + Tcl_SetObjLength(objPtr, numBytes); return -1; } @@ -5853,7 +5814,8 @@ ReadChars( memcpy(RemovePoint(nextPtr), src, (size_t) srcLen); RecycleBuffer(statePtr, bufPtr, 0); statePtr->inQueueHead = nextPtr; - return ReadChars(statePtr, objPtr, charsToRead, offsetPtr, factorPtr); + Tcl_SetObjLength(objPtr, numBytes); + return ReadChars(statePtr, objPtr, charsToRead, factorPtr); } dstRead = dstWrote; @@ -5866,6 +5828,7 @@ ReadChars( */ if (dstWrote == 0) { + Tcl_SetObjLength(objPtr, numBytes); return -1; } statePtr->inputEncodingState = oldState; @@ -5905,7 +5868,7 @@ ReadChars( if (dstWrote > srcRead + 1) { *factorPtr = dstWrote * UTF_EXPANSION_FACTOR / srcRead; } - *offsetPtr += dstWrote; + Tcl_SetObjLength(objPtr, numBytes + dstWrote); return numChars; } diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index a929d04..d96d814 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -1139,7 +1139,8 @@ Tcl_AppendLimitedToObj( if (ellipsis == NULL) { ellipsis = "..."; } - toCopy = Tcl_UtfPrev(bytes+limit+1-strlen(ellipsis), bytes) - bytes; + toCopy = (bytes == NULL) ? limit + : Tcl_UtfPrev(bytes+limit+1-strlen(ellipsis), bytes) - bytes; } /* @@ -1386,7 +1387,8 @@ AppendUnicodeToUnicodeRep( * due to the reallocs below. */ int offset = -1; - if (unicode >= stringPtr->unicode && unicode <= stringPtr->unicode + if (unicode && unicode >= stringPtr->unicode + && unicode <= stringPtr->unicode + stringPtr->uallocated / sizeof(Tcl_UniChar)) { offset = unicode - stringPtr->unicode; } @@ -1405,8 +1407,10 @@ AppendUnicodeToUnicodeRep( * trailing null. */ - memcpy(stringPtr->unicode + stringPtr->numChars, unicode, - appendNumChars * sizeof(Tcl_UniChar)); + if (unicode) { + memcpy(stringPtr->unicode + stringPtr->numChars, unicode, + appendNumChars * sizeof(Tcl_UniChar)); + } stringPtr->unicode[numChars] = 0; stringPtr->numChars = numChars; stringPtr->allocated = 0; @@ -1478,8 +1482,8 @@ AppendUtfToUnicodeRep( int numBytes) /* Number of bytes of "bytes" to convert. */ { Tcl_DString dsPtr; - int numChars; - Tcl_UniChar *unicode; + int numChars = numBytes; + Tcl_UniChar *unicode = NULL; if (numBytes < 0) { numBytes = (bytes ? strlen(bytes) : 0); @@ -1489,8 +1493,11 @@ AppendUtfToUnicodeRep( } Tcl_DStringInit(&dsPtr); - numChars = Tcl_NumUtfChars(bytes, numBytes); - unicode = (Tcl_UniChar *)Tcl_UtfToUniCharDString(bytes, numBytes, &dsPtr); + if (bytes) { + numChars = Tcl_NumUtfChars(bytes, numBytes); + unicode = (Tcl_UniChar *) Tcl_UtfToUniCharDString(bytes, numBytes, + &dsPtr); + } AppendUnicodeToUnicodeRep(objPtr, unicode, numChars); Tcl_DStringFree(&dsPtr); } @@ -1547,7 +1554,7 @@ AppendUtfToUtfRep( * due to the reallocs below. */ int offset = -1; - if (bytes >= objPtr->bytes + if (bytes && bytes >= objPtr->bytes && bytes <= objPtr->bytes + objPtr->length) { offset = bytes - objPtr->bytes; } @@ -1585,7 +1592,9 @@ AppendUtfToUtfRep( stringPtr->numChars = -1; stringPtr->hasUnicode = 0; - memcpy(objPtr->bytes + oldLength, bytes, (size_t) numBytes); + if (bytes) { + memcpy(objPtr->bytes + oldLength, bytes, (size_t) numBytes); + } objPtr->bytes[newLength] = 0; objPtr->length = newLength; } -- cgit v0.12 From 11b2556b272a74d9456a2b0b9cef5ccc76fd8316 Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 6 Feb 2014 19:04:36 +0000 Subject: Revised ReadChars to restore an attempt to make sure we do not short read because of a false notion of limited storage space. The test suite does not appear to demonstrate any case where this matters. Could be an incomplete test suite, or an example of pointless code. --- generic/tclIO.c | 8 +++++++- generic/tclInt.h | 2 ++ generic/tclStringObj.c | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index f8baba3..cedf3f6 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5310,7 +5310,13 @@ ReadChars( dstNeeded = TCL_UTF_MAX - 1 + toRead * factor / UTF_EXPANSION_FACTOR; (void) TclGetStringFromObj(objPtr, &numBytes); Tcl_AppendToObj(objPtr, NULL, dstNeeded); - dst = TclGetString(objPtr) + numBytes; + if (toRead == srcLen) { + unsigned int size; + dst = TclGetStringStorage(objPtr, &size) + numBytes; + dstNeeded = size - numBytes; + } else { + dst = TclGetString(objPtr) + numBytes; + } /* * [Bug 1462248]: The cause of the crash reported in this bug is this: diff --git a/generic/tclInt.h b/generic/tclInt.h index a998460..0c09ec0 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2571,6 +2571,8 @@ MODULE_SCOPE int TclGetOpenModeEx(Tcl_Interp *interp, int *binaryPtr); MODULE_SCOPE Tcl_Obj * TclGetProcessGlobalValue(ProcessGlobalValue *pgvPtr); MODULE_SCOPE const char *TclGetSrcInfoForCmd(Interp *iPtr, int *lenPtr); +MODULE_SCOPE char * TclGetStringStorage(Tcl_Obj *objPtr, + unsigned int *sizePtr); MODULE_SCOPE int TclGlob(Tcl_Interp *interp, char *pattern, Tcl_Obj *unquotedPrefix, int globFlags, Tcl_GlobTypeData *types); diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index d96d814..8c6a376 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -2711,6 +2711,38 @@ Tcl_ObjPrintf( /* *--------------------------------------------------------------------------- * + * TclGetStringStorage -- + * + * Returns the string storage space of a Tcl_Obj. + * + * Results: + * The pointer value objPtr->bytes is returned and the number of bytes + * allocated there is written to *sizePtr (if known). + * + * Side effects: + * May set objPtr->bytes. + * + *--------------------------------------------------------------------------- + */ + +char * +TclGetStringStorage( + Tcl_Obj *objPtr, + unsigned int *sizePtr) +{ + String *stringPtr; + + if (objPtr->typePtr != &tclStringType || objPtr->bytes == NULL) { + return TclGetStringFromObj(objPtr, (int *)sizePtr); + } + + stringPtr = GET_STRING(objPtr); + *sizePtr = stringPtr->allocated; + return objPtr->bytes; +} +/* + *--------------------------------------------------------------------------- + * * TclStringObjReverse -- * * Implements the [string reverse] operation. -- cgit v0.12 From 08700ad2348944e47107ddbcbf18bbd7d861668d Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 11 Feb 2014 19:53:41 +0000 Subject: Refactor so that CopyAndTranslateBuffer() calls on TranslateInputEOL() instead of duplicating so much of its function. Note the testing gaps. --- generic/tclIO.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index cedf3f6..09b8191 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -8805,8 +8805,6 @@ CopyAndTranslateBuffer( * in the current input buffer? */ int copied; /* How many characters were already copied * into the destination space? */ - int i; /* Iterates over the copied input looking for - * the input eofChar. */ /* * If there is no input at all, return zero. The invariant is that either @@ -8821,6 +8819,15 @@ CopyAndTranslateBuffer( bufPtr = statePtr->inQueueHead; bytesInBuffer = BytesLeft(bufPtr); +#if 1 + copied = space; + if (bytesInBuffer <= copied) { + copied = bytesInBuffer; + } + TranslateInputEOL(statePtr, result, RemovePoint(bufPtr), + &copied, &bytesInBuffer); + bufPtr->nextRemoved += copied; +#else copied = 0; switch (statePtr->inputTranslation) { case TCL_TRANSLATE_LF: @@ -8842,6 +8849,7 @@ CopyAndTranslateBuffer( case TCL_TRANSLATE_CR: { char *end; + Tcl_Panic("Untested"); if (bytesInBuffer == 0) { return 0; } @@ -8873,6 +8881,7 @@ CopyAndTranslateBuffer( * If there is a held-back "\r" at EOF, produce it now. */ + Tcl_Panic("Untested"); if (bytesInBuffer == 0) { if ((statePtr->flags & (INPUT_SAW_CR | CHANNEL_EOF)) == (INPUT_SAW_CR | CHANNEL_EOF)) { @@ -8940,6 +8949,7 @@ CopyAndTranslateBuffer( for (src = result; src < end; src++) { curByte = *src; if (curByte == '\r') { + Tcl_Panic("Untested"); SetFlag(statePtr, INPUT_SAW_CR); *dst = '\n'; dst++; @@ -8965,6 +8975,9 @@ CopyAndTranslateBuffer( */ if (statePtr->inEofChar != 0) { + int i; + + Tcl_Panic("Untested"); for (i = 0; i < copied; i++) { if (result[i] == (char) statePtr->inEofChar) { /* @@ -8979,6 +8992,7 @@ CopyAndTranslateBuffer( } } } +#endif /* * If the current buffer is empty recycle it. -- cgit v0.12 From abe90f6c1b82d92b9e000f861edde447cf1d7863 Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 18 Feb 2014 17:54:06 +0000 Subject: Coverage test for -translation auto handling of INPUT_SAW_CR flag. Demonstrates refactor failure. --- generic/tclIO.c | 1 - tests/io.test | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 09b8191..20101c2 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -8949,7 +8949,6 @@ CopyAndTranslateBuffer( for (src = result; src < end; src++) { curByte = *src; if (curByte == '\r') { - Tcl_Panic("Untested"); SetFlag(statePtr, INPUT_SAW_CR); *dst = '\n'; dst++; diff --git a/tests/io.test b/tests/io.test index 68051d7..e08c57a 100644 --- a/tests/io.test +++ b/tests/io.test @@ -6730,6 +6730,21 @@ test io-52.11 {TclCopyChannel & encodings} {fcopy} { file size $path(kyrillic.txt) } 3 +test io-52.12 {coverage of -translation auto} { + file delete $path(test1) $path(test2) + set out [open $path(test1) wb] + chan configure $out -translation lf + puts -nonewline $out abcdefg\rhijklmn\nopqrstu\r\nvwxyz + close $out + set in [open $path(test1)] + chan configure $in -buffersize 8 + set out [open $path(test2) w] + fcopy $in $out + close $in + close $out + file size $path(test2) +} 29 + test io-53.1 {CopyData} {fcopy} { file delete $path(test1) set f1 [open $thisScript] -- cgit v0.12 From e3b160fb968cfca1ba3255292e5583bd0bf3e37d Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 18 Feb 2014 18:26:22 +0000 Subject: Refactor correction exposed by coverage test. --- generic/tclIO.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 20101c2..01af6dc 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -8826,7 +8826,7 @@ CopyAndTranslateBuffer( } TranslateInputEOL(statePtr, result, RemovePoint(bufPtr), &copied, &bytesInBuffer); - bufPtr->nextRemoved += copied; + bufPtr->nextRemoved += bytesInBuffer; #else copied = 0; switch (statePtr->inputTranslation) { -- cgit v0.12 From 997ad71bccf25cf78178d99b5bd94103ef365e4d Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 18 Feb 2014 18:35:19 +0000 Subject: coverage test for -translation cr --- generic/tclIO.c | 1 - tests/io.test | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 01af6dc..4197dc0 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -8849,7 +8849,6 @@ CopyAndTranslateBuffer( case TCL_TRANSLATE_CR: { char *end; - Tcl_Panic("Untested"); if (bytesInBuffer == 0) { return 0; } diff --git a/tests/io.test b/tests/io.test index e08c57a..0c2944b 100644 --- a/tests/io.test +++ b/tests/io.test @@ -6744,6 +6744,20 @@ test io-52.12 {coverage of -translation auto} { close $out file size $path(test2) } 29 +test io-52.13 {coverage of -translation cr} { + file delete $path(test1) $path(test2) + set out [open $path(test1) wb] + chan configure $out -translation lf + puts -nonewline $out abcdefg\rhijklmn\nopqrstu\r\nvwxyz + close $out + set in [open $path(test1)] + chan configure $in -buffersize 8 -translation cr + set out [open $path(test2) w] + fcopy $in $out + close $in + close $out + file size $path(test2) +} 30 test io-53.1 {CopyData} {fcopy} { file delete $path(test1) -- cgit v0.12 From 9afa8a13e86fbd71a030ead7909cbe7d7db76296 Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 18 Feb 2014 21:27:35 +0000 Subject: Another coverage test that reveals refactoring error. --- generic/tclIO.c | 2 +- tests/io.test | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 4197dc0..c862923 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -8880,10 +8880,10 @@ CopyAndTranslateBuffer( * If there is a held-back "\r" at EOF, produce it now. */ - Tcl_Panic("Untested"); if (bytesInBuffer == 0) { if ((statePtr->flags & (INPUT_SAW_CR | CHANNEL_EOF)) == (INPUT_SAW_CR | CHANNEL_EOF)) { + Tcl_Panic("Untested"); result[0] = '\r'; ResetFlag(statePtr, INPUT_SAW_CR); return 1; diff --git a/tests/io.test b/tests/io.test index 0c2944b..4df44a3 100644 --- a/tests/io.test +++ b/tests/io.test @@ -6758,6 +6758,20 @@ test io-52.13 {coverage of -translation cr} { close $out file size $path(test2) } 30 +test io-52.14 {coverage of -translation crlf} { + file delete $path(test1) $path(test2) + set out [open $path(test1) wb] + chan configure $out -translation lf + puts -nonewline $out abcdefg\rhijklmn\nopqrstu\r\nvwxyz + close $out + set in [open $path(test1)] + chan configure $in -buffersize 8 -translation crlf + set out [open $path(test2) w] + fcopy $in $out + close $in + close $out + file size $path(test2) +} 29 test io-53.1 {CopyData} {fcopy} { file delete $path(test1) -- cgit v0.12 From 1a35a544342c26a5fa207edcd05448d6f525d9a1 Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 18 Feb 2014 23:20:06 +0000 Subject: Callers of TranslateInputEOL are expected to manage the INPUT_NEED_NL flag. --- generic/tclIO.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/generic/tclIO.c b/generic/tclIO.c index c862923..68d370a 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -8824,6 +8824,20 @@ CopyAndTranslateBuffer( if (bytesInBuffer <= copied) { copied = bytesInBuffer; } + if (copied == 0) { + return 0; + } + if (statePtr->flags & INPUT_NEED_NL) { + ResetFlag(statePtr, INPUT_NEED_NL); + + if (RemovePoint(bufPtr)[0] == '\n') { + bufPtr->nextRemoved++; + *result = '\n'; + } else { + *result = '\r'; + } + return 1; + } TranslateInputEOL(statePtr, result, RemovePoint(bufPtr), &copied, &bytesInBuffer); bufPtr->nextRemoved += bytesInBuffer; -- cgit v0.12 From 03c5b98228950023fde73077aa1b3e401e373d1c Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 19 Feb 2014 03:36:15 +0000 Subject: Shortcut ReadBytes() when it's a no-op. --- generic/tclIO.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 68d370a..7820242 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5201,6 +5201,9 @@ ReadBytes( if ((unsigned) toRead > (unsigned) srcLen) { toRead = srcLen; } + if (toRead == 0) { + return 0; + } (void) Tcl_GetByteArrayFromObj(objPtr, &length); TclAppendBytesToByteArray(objPtr, NULL, toRead); @@ -5209,7 +5212,7 @@ ReadBytes( if (statePtr->flags & INPUT_NEED_NL) { ResetFlag(statePtr, INPUT_NEED_NL); - if ((srcLen == 0) || (*src != '\n')) { + if (*src != '\n') { *dst = '\r'; length += 1; Tcl_SetByteArrayLength(objPtr, length); -- cgit v0.12 From e9a5cb0bbfec8877ccb5b56d39e6100ba9c5e42d Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 19 Feb 2014 03:45:17 +0000 Subject: Next coverage test to expose another refactoring error. --- generic/tclIO.c | 1 - tests/io.test | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 7820242..3b2b53e 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -8900,7 +8900,6 @@ CopyAndTranslateBuffer( if (bytesInBuffer == 0) { if ((statePtr->flags & (INPUT_SAW_CR | CHANNEL_EOF)) == (INPUT_SAW_CR | CHANNEL_EOF)) { - Tcl_Panic("Untested"); result[0] = '\r'; ResetFlag(statePtr, INPUT_SAW_CR); return 1; diff --git a/tests/io.test b/tests/io.test index 4df44a3..8c066ca 100644 --- a/tests/io.test +++ b/tests/io.test @@ -6772,6 +6772,20 @@ test io-52.14 {coverage of -translation crlf} { close $out file size $path(test2) } 29 +test io-52.15 {coverage of -translation crlf} { + file delete $path(test1) $path(test2) + set out [open $path(test1) wb] + chan configure $out -translation lf + puts -nonewline $out abcdefg\r + close $out + set in [open $path(test1)] + chan configure $in -buffersize 8 -translation crlf + set out [open $path(test2) w] + fcopy $in $out + close $in + close $out + file size $path(test2) +} 8 test io-53.1 {CopyData} {fcopy} { file delete $path(test1) -- cgit v0.12 From d0f15c03d3f5385a24eea5b7b2cdc6f8d95a8933 Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 20 Feb 2014 03:51:16 +0000 Subject: Refactoring repair to fix failing test. --- generic/tclIO.c | 52 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 3b2b53e..3cddc29 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -8808,6 +8808,7 @@ CopyAndTranslateBuffer( * in the current input buffer? */ int copied; /* How many characters were already copied * into the destination space? */ + int toCopy; /* * If there is no input at all, return zero. The invariant is that either @@ -8822,30 +8823,51 @@ CopyAndTranslateBuffer( bufPtr = statePtr->inQueueHead; bytesInBuffer = BytesLeft(bufPtr); + copied = 0; #if 1 - copied = space; - if (bytesInBuffer <= copied) { - copied = bytesInBuffer; - } - if (copied == 0) { - return 0; - } if (statePtr->flags & INPUT_NEED_NL) { - ResetFlag(statePtr, INPUT_NEED_NL); - if (RemovePoint(bufPtr)[0] == '\n') { - bufPtr->nextRemoved++; - *result = '\n'; - } else { + /* + * An earlier call to TranslateInputEOL ended in the read of a \r . + * Only the next read from the same channel can complete the + * translation sequence to tell us what character we should read. + */ + + if (bytesInBuffer) { + /* There's a next byte. It will settle things. */ + ResetFlag(statePtr, INPUT_NEED_NL); + + if (RemovePoint(bufPtr)[0] == '\n') { + bufPtr->nextRemoved++; + bytesInBuffer--; + *result++ = '\n'; + } else { + *result++ = '\r'; + } + copied++; + space--; + } else if (statePtr->flags & CHANNEL_EOF) { + /* There is no next byte, and there never will be (EOF). */ + ResetFlag(statePtr, INPUT_NEED_NL); *result = '\r'; + return 1; + } else { + /* There is no next byte. Ask the caller to read more. */ + return 0; } - return 1; + } + toCopy = space; + if (bytesInBuffer <= toCopy) { + toCopy = bytesInBuffer; + } + if (toCopy == 0) { + return copied; } TranslateInputEOL(statePtr, result, RemovePoint(bufPtr), - &copied, &bytesInBuffer); + &toCopy, &bytesInBuffer); bufPtr->nextRemoved += bytesInBuffer; + copied += toCopy; #else - copied = 0; switch (statePtr->inputTranslation) { case TCL_TRANSLATE_LF: if (bytesInBuffer == 0) { -- cgit v0.12 From ddaf1d27cb4ec6db78294cb42e1fd46ae6d2dbc2 Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 20 Feb 2014 20:31:55 +0000 Subject: Can we send some binary reads down the char-reading path? --- generic/tclIO.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 3636861..4d7133a 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5096,7 +5096,8 @@ DoReadChars( for (copied = 0; (unsigned) toRead > 0; ) { copiedNow = -1; if (statePtr->inQueueHead != NULL) { - if (encoding == NULL) { + if (encoding == NULL + && statePtr->inputTranslation == TCL_TRANSLATE_LF) { copiedNow = ReadBytes(statePtr, objPtr, toRead, &offset); } else { copiedNow = ReadChars(statePtr, objPtr, toRead, &offset, @@ -5320,6 +5321,8 @@ ReadChars( char *src, *dst; Tcl_EncodingState oldState; int encEndFlagSuppressed = 0; + Tcl_Encoding encoding = statePtr->encoding? statePtr->encoding + : GetBinaryEncoding(); factor = *factorPtr; offset = *offsetPtr; @@ -5424,7 +5427,7 @@ ReadChars( */ ResetFlag(statePtr, INPUT_NEED_NL); - Tcl_ExternalToUtf(NULL, statePtr->encoding, src, srcLen, + Tcl_ExternalToUtf(NULL, encoding, src, srcLen, statePtr->inputEncodingFlags, &statePtr->inputEncodingState, dst, TCL_UTF_MAX + 1, &srcRead, &dstWrote, &numChars); if ((dstWrote > 0) && (*dst == '\n')) { @@ -5449,7 +5452,7 @@ ReadChars( return 1; } - Tcl_ExternalToUtf(NULL, statePtr->encoding, src, srcLen, + Tcl_ExternalToUtf(NULL, encoding, src, srcLen, statePtr->inputEncodingFlags, &statePtr->inputEncodingState, dst, dstNeeded + 1, &srcRead, &dstWrote, &numChars); @@ -5522,7 +5525,7 @@ ReadChars( return -1; } statePtr->inputEncodingState = oldState; - Tcl_ExternalToUtf(NULL, statePtr->encoding, src, srcLen, + Tcl_ExternalToUtf(NULL, encoding, src, srcLen, statePtr->inputEncodingFlags, &statePtr->inputEncodingState, dst, dstRead + TCL_UTF_MAX, &srcRead, &dstWrote, &numChars); TranslateInputEOL(statePtr, dst, dst, &dstWrote, &dstRead); @@ -5545,7 +5548,7 @@ ReadChars( eof = Tcl_UtfAtIndex(dst, toRead); statePtr->inputEncodingState = oldState; - Tcl_ExternalToUtf(NULL, statePtr->encoding, src, srcLen, + Tcl_ExternalToUtf(NULL, encoding, src, srcLen, statePtr->inputEncodingFlags, &statePtr->inputEncodingState, dst, eof - dst + TCL_UTF_MAX, &srcRead, &dstWrote, &numChars); dstRead = dstWrote; -- cgit v0.12 From 9f6aaa68fc35449d224e5a1ea5d53e09ac38e509 Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 20 Feb 2014 21:23:33 +0000 Subject: Switch consistently on the narrower def of binary mode. --- generic/tclIO.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 4d7133a..eae063b 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5060,6 +5060,7 @@ DoReadChars( ChannelBuffer *bufPtr; int offset, factor, copied, copiedNow, result; Tcl_Encoding encoding; + int binaryMode; #define UTF_EXPANSION_FACTOR 1024 /* @@ -5070,8 +5071,12 @@ DoReadChars( encoding = statePtr->encoding; factor = UTF_EXPANSION_FACTOR; + binaryMode = (encoding == NULL) + && (statePtr->inputTranslation == TCL_TRANSLATE_LF) + && (statePtr->inEofChar == NULL); + if (appendFlag == 0) { - if (encoding == NULL) { + if (binaryMode) { Tcl_SetByteArrayLength(objPtr, 0); } else { Tcl_SetObjLength(objPtr, 0); @@ -5086,7 +5091,7 @@ DoReadChars( } offset = 0; } else { - if (encoding == NULL) { + if (binaryMode) { Tcl_GetByteArrayFromObj(objPtr, &offset); } else { TclGetStringFromObj(objPtr, &offset); @@ -5096,8 +5101,7 @@ DoReadChars( for (copied = 0; (unsigned) toRead > 0; ) { copiedNow = -1; if (statePtr->inQueueHead != NULL) { - if (encoding == NULL - && statePtr->inputTranslation == TCL_TRANSLATE_LF) { + if (binaryMode) { copiedNow = ReadBytes(statePtr, objPtr, toRead, &offset); } else { copiedNow = ReadChars(statePtr, objPtr, toRead, &offset, @@ -5146,7 +5150,7 @@ DoReadChars( } ResetFlag(statePtr, CHANNEL_BLOCKED); - if (encoding == NULL) { + if (binaryMode) { Tcl_SetByteArrayLength(objPtr, offset); } else { Tcl_SetObjLength(objPtr, offset); -- cgit v0.12 From 82eaf13ae6136c9679b5aeba5c75cd777f2829dd Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 21 Feb 2014 15:02:35 +0000 Subject: fix type error --- generic/tclIO.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index eae063b..ac28ec0 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5073,7 +5073,7 @@ DoReadChars( binaryMode = (encoding == NULL) && (statePtr->inputTranslation == TCL_TRANSLATE_LF) - && (statePtr->inEofChar == NULL); + && (statePtr->inEofChar == '\0'); if (appendFlag == 0) { if (binaryMode) { -- cgit v0.12 From 527d583d939f70450bc8b3db5077dd7d806c7c3e Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 21 Feb 2014 15:12:16 +0000 Subject: Simplify ReadBytes based on new constraints. --- generic/tclIO.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/generic/tclIO.c b/generic/tclIO.c index ac28ec0..23e1fbf 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5242,6 +5242,10 @@ ReadBytes( } dst += offset; +#if 1 + memcpy(dst, src, (size_t) toRead); + srcRead = dstWrote = toRead; +#else if (statePtr->flags & INPUT_NEED_NL) { ResetFlag(statePtr, INPUT_NEED_NL); if ((srcLen == 0) || (*src != '\n')) { @@ -5262,6 +5266,7 @@ ReadBytes( return -1; } } +#endif bufPtr->nextRemoved += srcRead; *offsetPtr += dstWrote; return dstWrote; -- cgit v0.12 From 73bf5da01200e7f7127273188ea24d751eb75ddf Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 24 Feb 2014 21:01:07 +0000 Subject: simplification trims --- generic/tclIO.c | 34 ++++------------------------------ 1 file changed, 4 insertions(+), 30 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 5625ff2..1c5fed4 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5194,7 +5194,7 @@ ReadBytes( * the bytes from the first buffer are * returned. */ { - int toRead, srcLen, length, srcRead, dstWrote; + int toRead, srcLen, length; ChannelBuffer *bufPtr; char *src, *dst; @@ -5215,37 +5215,11 @@ ReadBytes( dst = (char *) Tcl_GetByteArrayFromObj(objPtr, NULL); dst += length; -#if 1 memcpy(dst, src, (size_t) toRead); - srcRead = dstWrote = toRead; -#else - if (statePtr->flags & INPUT_NEED_NL) { - ResetFlag(statePtr, INPUT_NEED_NL); - if (*src != '\n') { - *dst = '\r'; - length += 1; - Tcl_SetByteArrayLength(objPtr, length); - return 1; - } - *dst++ = '\n'; - src++; - srcLen--; - toRead--; - } - - srcRead = srcLen; - dstWrote = toRead; - if (TranslateInputEOL(statePtr, dst, src, &dstWrote, &srcRead) != 0) { - if (dstWrote == 0) { - Tcl_SetByteArrayLength(objPtr, length); - return -1; - } - } -#endif - bufPtr->nextRemoved += srcRead; - length += dstWrote; + bufPtr->nextRemoved += toRead; + length += toRead; Tcl_SetByteArrayLength(objPtr, length); - return dstWrote; + return toRead; } /* -- cgit v0.12 From c7f19f76c5362c2918fe01d49808b3246fd84100 Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 24 Feb 2014 21:25:08 +0000 Subject: Reduce ReadBytes to simplest expression. --- generic/tclIO.c | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 1c5fed4..a73f041 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5194,31 +5194,13 @@ ReadBytes( * the bytes from the first buffer are * returned. */ { - int toRead, srcLen, length; - ChannelBuffer *bufPtr; - char *src, *dst; - - bufPtr = statePtr->inQueueHead; - src = RemovePoint(bufPtr); - srcLen = BytesLeft(bufPtr); - - toRead = bytesToRead; - if ((unsigned) toRead > (unsigned) srcLen) { - toRead = srcLen; - } - if (toRead == 0) { - return 0; - } - - (void) Tcl_GetByteArrayFromObj(objPtr, &length); - TclAppendBytesToByteArray(objPtr, NULL, toRead); - dst = (char *) Tcl_GetByteArrayFromObj(objPtr, NULL); - dst += length; + ChannelBuffer *bufPtr = statePtr->inQueueHead; + int srcLen = BytesLeft(bufPtr); + int toRead = bytesToRead>srcLen || bytesToRead<0 ? srcLen : bytesToRead; - memcpy(dst, src, (size_t) toRead); + TclAppendBytesToByteArray(objPtr, (unsigned char *) RemovePoint(bufPtr), + toRead); bufPtr->nextRemoved += toRead; - length += toRead; - Tcl_SetByteArrayLength(objPtr, length); return toRead; } -- cgit v0.12 From e3adf1d9a076bcb2704e4364c50097b49e6348c5 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 26 Feb 2014 11:26:00 +0000 Subject: More coverage tests and bug fixes. --- generic/tclIO.c | 3 +-- tests/io.test | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 2 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index a73f041..c2a8cab 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5644,7 +5644,7 @@ TranslateInputEOL( SetFlag(statePtr, CHANNEL_EOF | CHANNEL_STICKY_EOF); statePtr->inputEncodingFlags |= TCL_ENCODING_END; - ResetFlag(statePtr, INPUT_SAW_CR | INPUT_NEED_NL); +// ResetFlag(statePtr, INPUT_SAW_CR | INPUT_NEED_NL); return 1; } @@ -8981,7 +8981,6 @@ CopyAndTranslateBuffer( if (statePtr->inEofChar != 0) { int i; - Tcl_Panic("Untested"); for (i = 0; i < copied; i++) { if (result[i] == (char) statePtr->inEofChar) { /* diff --git a/tests/io.test b/tests/io.test index 8c066ca..6f4877f 100644 --- a/tests/io.test +++ b/tests/io.test @@ -6786,6 +6786,62 @@ test io-52.15 {coverage of -translation crlf} { close $out file size $path(test2) } 8 +test io-52.16 {coverage of eofChar handling} { + file delete $path(test1) $path(test2) + set out [open $path(test1) wb] + chan configure $out -translation lf + puts -nonewline $out abcdefg\rhijklmn\nopqrstu\r\nvwxyz + close $out + set in [open $path(test1)] + chan configure $in -buffersize 8 -translation lf -eofchar a + set out [open $path(test2) w] + fcopy $in $out + close $in + close $out + file size $path(test2) +} 0 +test io-52.17 {coverage of eofChar handling} { + file delete $path(test1) $path(test2) + set out [open $path(test1) wb] + chan configure $out -translation lf + puts -nonewline $out abcdefg\rhijklmn\nopqrstu\r\nvwxyz + close $out + set in [open $path(test1)] + chan configure $in -buffersize 8 -translation lf -eofchar d + set out [open $path(test2) w] + fcopy $in $out + close $in + close $out + file size $path(test2) +} 3 +test io-52.18 {coverage of eofChar handling} { + file delete $path(test1) $path(test2) + set out [open $path(test1) wb] + chan configure $out -translation lf + puts -nonewline $out abcdefg\rhijklmn\nopqrstu\r\nvwxyz + close $out + set in [open $path(test1)] + chan configure $in -buffersize 8 -translation crlf -eofchar h + set out [open $path(test2) w] + fcopy $in $out + close $in + close $out + file size $path(test2) +} 8 +test io-52.19 {coverage of eofChar handling} { + file delete $path(test1) $path(test2) + set out [open $path(test1) wb] + chan configure $out -translation lf + puts -nonewline $out abcdefg\rhijklmn\nopqrstu\r\nvwxyz + close $out + set in [open $path(test1)] + chan configure $in -buffersize 10 -translation crlf -eofchar h + set out [open $path(test2) w] + fcopy $in $out + close $in + close $out + file size $path(test2) +} 8 test io-53.1 {CopyData} {fcopy} { file delete $path(test1) -- cgit v0.12 From aad7393c9adb8f82f2594929954960b91d027032 Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 27 Feb 2014 20:11:47 +0000 Subject: remove comment --- generic/tclIO.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index c2a8cab..8d75bf2 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5644,7 +5644,7 @@ TranslateInputEOL( SetFlag(statePtr, CHANNEL_EOF | CHANNEL_STICKY_EOF); statePtr->inputEncodingFlags |= TCL_ENCODING_END; -// ResetFlag(statePtr, INPUT_SAW_CR | INPUT_NEED_NL); + ResetFlag(statePtr, INPUT_SAW_CR | INPUT_NEED_NL); return 1; } -- cgit v0.12 From 3df8548690a047e4fa9a445a253636a3e3a652df Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 27 Feb 2014 20:21:10 +0000 Subject: Work in progress attempting a ReadChars rewrite. --- generic/tclIO.c | 120 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 119 insertions(+), 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 3636861..20428b5 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5372,6 +5372,120 @@ ReadChars( } dst = objPtr->bytes + offset; +#if 1 + + /* + * This routine is burdened with satisfying several constraints. + * It cannot append more than 'charsToRead` chars onto objPtr. + * This is measured after encoding and translation transformations + * are completed. There is no precise number of src bytes that can + * be associated with the limit. Yet, when we are done, we must know + * precisely the number of src bytes that were consumed to produce + * the appended chars, so that all subsequent bytes are left in + * the buffers for future read operations. + * + * The consequence is that we have no choice but to implement a + * "trial and error" approach, where in general we may need to + * perform transformations and copies multiple times to achieve + * a consistent set of results. This takes the shape of a loop. + */ + + int dstLimit = dstNeeded + 1; + int savedFlags = statePtr->flags; + int savedIEFlags = statePtr->inputEncodingFlags; + Tcl_EncodingState savedState = statePtr->inputEncodingState; + + while (1) { + int dstDecoded; + + /* + * Perform the encoding transformation. Read no more than + * srcLen bytes, write no more than dstLimit bytes. + */ + +//fprintf(stdout, "Start %d %d\n", dstLimit, srcLen); fflush(stdout); + int code = Tcl_ExternalToUtf(NULL, statePtr->encoding, src, srcLen, + statePtr->inputEncodingFlags & (bufPtr->nextPtr + ? ~0 : ~TCL_ENCODING_END), &statePtr->inputEncodingState, + dst, dstLimit, &srcRead, &dstDecoded, &numChars); + + /* + * Perform the translation transformation in place. Read no more + * than the dstDecoded bytes the encoding transformation actually + * produced. Capture the number of bytes written in dstWrote. + * Capture the number of bytes actually consumed in dstRead. + */ + +//fprintf(stdout, "Key NS=%d MB=%d S=%d\n", TCL_CONVERT_NOSPACE, +//TCL_CONVERT_MULTIBYTE, TCL_CONVERT_SYNTAX); fflush(stdout); +//fprintf(stdout, "Decoded %d %d\n", dstDecoded,code); fflush(stdout); + dstWrote = dstRead = dstDecoded; + TranslateInputEOL(statePtr, dst, dst, &dstWrote, &dstRead); + + if (dstRead < dstDecoded) { + + /* + * The encoding transformation produced bytes that the + * translation transformation did not consume. Start over + * and impose new limits so that doesn't happen again. + */ +//fprintf(stdout, "X! %d %d\n", dstRead, dstDecoded); fflush(stdout); + + dstLimit = dstRead + TCL_UTF_MAX; + statePtr->flags = savedFlags; + statePtr->inputEncodingFlags = savedIEFlags; + statePtr->inputEncodingState = savedState; + continue; + } + +//fprintf(stdout, "check %d %d %d\n", dstWrote, dstRead, dstDecoded); +//fflush(stdout); + + /* + * The translation transformation can only reduce the number + * of chars when it converts \r\n into \n. The reduction in + * the number of chars is the difference in bytes read and written. + */ + + numChars -= (dstRead - dstWrote); + + if (charsToRead > 0 && numChars > charsToRead) { + + /* + * We read more chars than allowed. Reset limits to + * prevent that and try again. + */ +//fprintf(stdout, "Y!\n"); fflush(stdout); + + dstLimit = Tcl_UtfAtIndex(dst, charsToRead + 1) - dst; + statePtr->flags = savedFlags; + statePtr->inputEncodingFlags = savedIEFlags; + statePtr->inputEncodingState = savedState; + continue; + } + + if (dstWrote == 0) { + +//fprintf(stdout, "Z!\n"); fflush(stdout); + /* + * Could not read anything. Ask caller to get more data. + */ + + return -1; + } + + statePtr->inputEncodingFlags &= ~TCL_ENCODING_START; + + bufPtr->nextRemoved += srcRead; + if (dstWrote > srcRead + 1) { + *factorPtr = dstWrote * UTF_EXPANSION_FACTOR / srcRead; + } + *offsetPtr += dstWrote; +//fprintf(stdout, "OK: %d\n", numChars); fflush(stdout); + return numChars; + } + +#else /* * [Bug 1462248]: The cause of the crash reported in this bug is this: * @@ -5560,6 +5674,7 @@ ReadChars( } *offsetPtr += dstWrote; return numChars; +#endif } /* @@ -5661,7 +5776,9 @@ TranslateInputEOL( if (*src == '\r') { src++; if (src >= srcMax) { - SetFlag(statePtr, INPUT_NEED_NL); +// SetFlag(statePtr, INPUT_NEED_NL); +//fprintf(stdout, "BREAK!\n"); fflush(stdout); +src--; break; } else if (*src == '\n') { *dst++ = *src++; } else { @@ -5673,6 +5790,7 @@ TranslateInputEOL( } srcLen = src - srcStart; dstLen = dst - dstStart; +//fprintf(stdout, "eh? %d %d\n", srcLen, dstLen); fflush(stdout); break; } case TCL_TRANSLATE_AUTO: { -- cgit v0.12 From 607601abc11ec2e965fedc5d3cb1e6d83c3a4a10 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 28 Feb 2014 18:25:20 +0000 Subject: More ReadChars rewriting. Test suite now passes. Note that this reform simplifies ReadChars a fair bit (at least in my eyes). Also it does away with the use of an INPUT_NEED_NL flag, using the same strategy for partial \r\n sequences as is used for incomplete multibyte chars. --- generic/tclIO.c | 194 ++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 173 insertions(+), 21 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 20428b5..ec71991 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5403,7 +5403,6 @@ ReadChars( * srcLen bytes, write no more than dstLimit bytes. */ -//fprintf(stdout, "Start %d %d\n", dstLimit, srcLen); fflush(stdout); int code = Tcl_ExternalToUtf(NULL, statePtr->encoding, src, srcLen, statePtr->inputEncodingFlags & (bufPtr->nextPtr ? ~0 : ~TCL_ENCODING_END), &statePtr->inputEncodingState, @@ -5416,9 +5415,6 @@ ReadChars( * Capture the number of bytes actually consumed in dstRead. */ -//fprintf(stdout, "Key NS=%d MB=%d S=%d\n", TCL_CONVERT_NOSPACE, -//TCL_CONVERT_MULTIBYTE, TCL_CONVERT_SYNTAX); fflush(stdout); -//fprintf(stdout, "Decoded %d %d\n", dstDecoded,code); fflush(stdout); dstWrote = dstRead = dstDecoded; TranslateInputEOL(statePtr, dst, dst, &dstWrote, &dstRead); @@ -5426,20 +5422,144 @@ ReadChars( /* * The encoding transformation produced bytes that the - * translation transformation did not consume. Start over - * and impose new limits so that doesn't happen again. + * translation transformation did not consume. Why did + * this happen? */ -//fprintf(stdout, "X! %d %d\n", dstRead, dstDecoded); fflush(stdout); - dstLimit = dstRead + TCL_UTF_MAX; - statePtr->flags = savedFlags; - statePtr->inputEncodingFlags = savedIEFlags; - statePtr->inputEncodingState = savedState; - continue; - } + if (statePtr->inEofChar && dst[dstRead] == statePtr->inEofChar) { + /* + * 1) There's an eof char set on the channel, and + * we saw it and stopped translating at that point. + * + * NOTE the bizarre spec of TranslateInputEOL in this case. + * Clearly the eof char had to be read in order to account + * for the stopping, but the value of dstRead does not + * include it. + * + * Also rather bizarre, our caller can only notice an + * EOF condition if we return the value -1 as the number + * of chars read. This forces us to perform a 2-call + * dance where the first call can read all the chars + * up to the eof char, and the second call is solely + * for consuming the encoded eof char then pointed at + * by src so that we can return that magic -1 value. + * This seems really wasteful, especially since + * the first decoding pass of each call is likely to + * decode many bytes beyond that eof char that's all we + * care about. + */ + + if (dstRead == 0) { + /* + * Curious choice in the eof char handling. We leave + * the eof char in the buffer. So, no need to compute + * a proper srcRead value. At this point, there + * are no chars before the eof char in the buffer. + */ + return -1; + } + + { + /* + * There are chars leading the buffer before the eof + * char. Adjust the dstLimit so we go back and read + * only those and do not encounter the eof char this + * time. + */ + + dstLimit = dstRead + TCL_UTF_MAX; + statePtr->flags = savedFlags; + statePtr->inputEncodingFlags = savedIEFlags; + statePtr->inputEncodingState = savedState; + continue; + } + } + + /* + * 2) The other way to read fewer bytes than are decoded + * is when the final byte is \r and we're in a CRLF + * translation mode so we cannot decide whether to + * record \r or \n yet. + */ + + assert(dstRead + 1 == dstDecoded); + assert(dst[dstRead] == '\r'); + assert(statePtr->inputTranslation == TCL_TRANSLATE_CRLF); + + if (dstWrote > 0) { + /* + * There are chars we can read before we hit the bare cr. + * Go back with a smaller dstLimit so we get them in the + * next pass, compute a matching srcRead, and don't end + * up back here in this call. + */ + + dstLimit = dstRead + TCL_UTF_MAX; + statePtr->flags = savedFlags; + statePtr->inputEncodingFlags = savedIEFlags; + statePtr->inputEncodingState = savedState; + continue; + } + + assert(dstWrote == 0); + assert(dstRead == 0); + assert(dstDecoded == 1); + + /* + * We decoded only the bare cr, and we cannot read a + * translated char from that alone. We have to know what's + * next. So why do we only have the one decoded char? + */ + + if (code != TCL_OK) { + char buffer[TCL_UTF_MAX + 2]; + int read, decoded, count; + + /* + * Didn't get everything the buffer could offer + */ + + statePtr->flags = savedFlags; + statePtr->inputEncodingFlags = savedIEFlags; + statePtr->inputEncodingState = savedState; + + Tcl_ExternalToUtf(NULL, statePtr->encoding, src, srcLen, + statePtr->inputEncodingFlags & (bufPtr->nextPtr + ? ~0 : ~TCL_ENCODING_END), &statePtr->inputEncodingState, + buffer, TCL_UTF_MAX + 2, &read, &decoded, &count); + + if (count == 2) { + if (buffer[1] == '\n') { + /* \r\n translate to \n */ + dst[0] = '\n'; + bufPtr->nextRemoved += read; + } else { + dst[0] = '\r'; + bufPtr->nextRemoved += srcRead; + } + + dst[1] = '\0'; + statePtr->inputEncodingFlags &= ~TCL_ENCODING_START; + + *offsetPtr += 1; + return 1; + } -//fprintf(stdout, "check %d %d %d\n", dstWrote, dstRead, dstDecoded); -//fflush(stdout); + } else if (statePtr->flags & CHANNEL_EOF) { + + /* + * The bare \r is the only char and we will never read + * a subsequent char to make the determination. + */ + + dst[0] = '\r'; + bufPtr->nextRemoved = bufPtr->nextAdded; + *offsetPtr += 1; + return 1; + } + + /* FALL THROUGH - get more data (dstWrote == 0) */ + } /* * The translation transformation can only reduce the number @@ -5455,7 +5575,6 @@ ReadChars( * We read more chars than allowed. Reset limits to * prevent that and try again. */ -//fprintf(stdout, "Y!\n"); fflush(stdout); dstLimit = Tcl_UtfAtIndex(dst, charsToRead + 1) - dst; statePtr->flags = savedFlags; @@ -5466,12 +5585,46 @@ ReadChars( if (dstWrote == 0) { -//fprintf(stdout, "Z!\n"); fflush(stdout); - /* - * Could not read anything. Ask caller to get more data. + /* + * We were not able to read any chars. Maybe there were + * not enough src bytes to decode into a char. Maybe + * a lone \r could not be translated (crlf mode). Need + * to combine any unused src bytes we have in the first + * buffer with subsequent bytes to try again. */ - return -1; + ChannelBuffer *nextPtr = bufPtr->nextPtr; + + if (nextPtr == NULL) { + if (srcLen > 0) { + SetFlag(statePtr, CHANNEL_NEED_MORE_DATA); + } + return -1; + } + + /* + * Space is made at the beginning of the buffer to copy the + * previous unused bytes there. Check first if the buffer we + * are using actually has enough space at its beginning for + * the data we are copying. Because if not we will write over + * the buffer management information, especially the 'nextPtr'. + * + * Note that the BUFFER_PADDING (See AllocChannelBuffer) is + * used to prevent exactly this situation. I.e. it should never + * happen. Therefore it is ok to panic should it happen despite + * the precautions. + */ + + if (nextPtr->nextRemoved - srcLen < 0) { + Tcl_Panic("Buffer Underflow, BUFFER_PADDING not enough"); + } + + nextPtr->nextRemoved -= srcLen; + memcpy(RemovePoint(nextPtr), src, (size_t) srcLen); + RecycleBuffer(statePtr, bufPtr, 0); + statePtr->inQueueHead = nextPtr; + return ReadChars(statePtr, objPtr, charsToRead, + offsetPtr, factorPtr); } statePtr->inputEncodingFlags &= ~TCL_ENCODING_START; @@ -5481,7 +5634,6 @@ ReadChars( *factorPtr = dstWrote * UTF_EXPANSION_FACTOR / srcRead; } *offsetPtr += dstWrote; -//fprintf(stdout, "OK: %d\n", numChars); fflush(stdout); return numChars; } -- cgit v0.12 From 7b66d219bab6b6710a22b4b18ca563239ffdc050 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 28 Feb 2014 18:28:16 +0000 Subject: tidy up. --- generic/tclIO.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index ec71991..a0a349f 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5928,9 +5928,8 @@ TranslateInputEOL( if (*src == '\r') { src++; if (src >= srcMax) { -// SetFlag(statePtr, INPUT_NEED_NL); -//fprintf(stdout, "BREAK!\n"); fflush(stdout); -src--; break; + src--; + break; } else if (*src == '\n') { *dst++ = *src++; } else { @@ -5942,7 +5941,6 @@ src--; break; } srcLen = src - srcStart; dstLen = dst - dstStart; -//fprintf(stdout, "eh? %d %d\n", srcLen, dstLen); fflush(stdout); break; } case TCL_TRANSLATE_AUTO: { -- cgit v0.12 From bb1b4fcb06f80fddfd136a9bd14bf64808f45971 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 28 Feb 2014 18:36:00 +0000 Subject: another coverage test. --- tests/io.test | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/io.test b/tests/io.test index 0941e02..c325809 100644 --- a/tests/io.test +++ b/tests/io.test @@ -4772,6 +4772,21 @@ test io-35.19 {Tcl_Eof, eof char in middle, cr write, crlf read} { close $f list $c $l $e [scan [string index $in end] %c] } {17 8 1 13} +test io-35.20 {Tcl_Eof, eof char in middle, cr write, crlf read} { + file delete $path(test1) + set f [open $path(test1) w] + fconfigure $f -translation cr -eofchar {} + set i [format \n%cqrsuvw 26] + puts $f $i + close $f + set c [file size $path(test1)] + set f [open $path(test1) r] + fconfigure $f -translation crlf -eofchar \x1a + set l [string length [set in [read $f]]] + set e [eof $f] + close $f + list $c $l $e [scan [string index $in end] %c] +} {9 1 1 13} # Test Tcl_InputBlocked -- cgit v0.12 From 6ac36ed52bd548be97ae7baa3022e822f6a1bdce Mon Sep 17 00:00:00 2001 From: dgp Date: Sat, 1 Mar 2014 03:01:41 +0000 Subject: Fixups make the test suite almost pass (except *io-39.17) --- generic/tclIO.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 7b798af..139a05e 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5289,7 +5289,7 @@ ReadChars( dst = TclGetString(objPtr) + numBytes; } -#if 0 +#if 1 /* * This routine is burdened with satisfying several constraints. @@ -5373,6 +5373,7 @@ ReadChars( * a proper srcRead value. At this point, there * are no chars before the eof char in the buffer. */ + Tcl_SetObjLength(objPtr, numBytes); return -1; } @@ -5459,7 +5460,6 @@ ReadChars( statePtr->inputEncodingFlags &= ~TCL_ENCODING_START; Tcl_SetObjLength(objPtr, numBytes + 1); - // *offsetPtr += 1; return 1; } @@ -5473,7 +5473,6 @@ ReadChars( dst[0] = '\r'; bufPtr->nextRemoved = bufPtr->nextAdded; Tcl_SetObjLength(objPtr, numBytes + 1); - //*offsetPtr += 1; return 1; } @@ -5518,6 +5517,7 @@ ReadChars( if (srcLen > 0) { SetFlag(statePtr, CHANNEL_NEED_MORE_DATA); } + Tcl_SetObjLength(objPtr, numBytes); return -1; } @@ -5542,6 +5542,7 @@ ReadChars( memcpy(RemovePoint(nextPtr), src, (size_t) srcLen); RecycleBuffer(statePtr, bufPtr, 0); statePtr->inQueueHead = nextPtr; + Tcl_SetObjLength(objPtr, numBytes); return ReadChars(statePtr, objPtr, charsToRead, factorPtr); } @@ -5552,7 +5553,6 @@ ReadChars( *factorPtr = dstWrote * UTF_EXPANSION_FACTOR / srcRead; } Tcl_SetObjLength(objPtr, numBytes + dstWrote); - //*offsetPtr += dstWrote; return numChars; } @@ -5850,9 +5850,8 @@ TranslateInputEOL( if (*src == '\r') { src++; if (src >= srcMax) { -SetFlag(statePtr, INPUT_NEED_NL); -// src--; -// break; + src--; + break; } else if (*src == '\n') { *dst++ = *src++; } else { -- cgit v0.12 From 23801213cacd306b0bfddbdb51efcd15c88ed0f9 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 5 Mar 2014 14:23:38 +0000 Subject: Merge repair to correct failing tests. --- generic/tclIO.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 139a05e..6e3f0cf 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5320,7 +5320,7 @@ ReadChars( * srcLen bytes, write no more than dstLimit bytes. */ - int code = Tcl_ExternalToUtf(NULL, statePtr->encoding, src, srcLen, + int code = Tcl_ExternalToUtf(NULL, encoding, src, srcLen, statePtr->inputEncodingFlags & (bufPtr->nextPtr ? ~0 : ~TCL_ENCODING_END), &statePtr->inputEncodingState, dst, dstLimit, &srcRead, &dstDecoded, &numChars); @@ -5441,7 +5441,7 @@ ReadChars( statePtr->inputEncodingFlags = savedIEFlags; statePtr->inputEncodingState = savedState; - Tcl_ExternalToUtf(NULL, statePtr->encoding, src, srcLen, + Tcl_ExternalToUtf(NULL, encoding, src, srcLen, statePtr->inputEncodingFlags & (bufPtr->nextPtr ? ~0 : ~TCL_ENCODING_END), &statePtr->inputEncodingState, buffer, TCL_UTF_MAX + 2, &read, &decoded, &count); -- cgit v0.12 From 9d31d410437d7e7fad1201c869e0a7c479daf693 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 5 Mar 2014 17:16:35 +0000 Subject: Adapt CopyAndTranslateBuffer() to changes in TranslateInputEOL(). Notably no longer using the INPUT_NEED_NL flag. --- generic/tclIO.c | 117 +++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 77 insertions(+), 40 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 6e3f0cf..013f8dd 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5914,7 +5914,7 @@ TranslateInputEOL( SetFlag(statePtr, CHANNEL_EOF | CHANNEL_STICKY_EOF); statePtr->inputEncodingFlags |= TCL_ENCODING_END; - ResetFlag(statePtr, INPUT_SAW_CR | INPUT_NEED_NL); + ResetFlag(statePtr, INPUT_SAW_CR); return 1; } @@ -9046,7 +9046,6 @@ CopyAndTranslateBuffer( * in the current input buffer? */ int copied; /* How many characters were already copied * into the destination space? */ - int toCopy; /* * If there is no input at all, return zero. The invariant is that either @@ -9061,51 +9060,90 @@ CopyAndTranslateBuffer( bufPtr = statePtr->inQueueHead; bytesInBuffer = BytesLeft(bufPtr); - copied = 0; -#if 0 - if (statePtr->flags & INPUT_NEED_NL) { +#if 1 + copied = space; + if (bytesInBuffer <= copied) { + copied = bytesInBuffer; + } + if (copied == 0) { + return copied; + } + TranslateInputEOL(statePtr, result, RemovePoint(bufPtr), + &copied, &bytesInBuffer); + bufPtr->nextRemoved += bytesInBuffer; - /* - * An earlier call to TranslateInputEOL ended in the read of a \r . - * Only the next read from the same channel can complete the - * translation sequence to tell us what character we should read. - */ + /* + * If the current buffer is empty recycle it. + */ + + if (IsBufferEmpty(bufPtr)) { + statePtr->inQueueHead = bufPtr->nextPtr; + if (statePtr->inQueueHead == NULL) { + statePtr->inQueueTail = NULL; + } + RecycleBuffer(statePtr, bufPtr, 0); + } else { + + if (copied > 0) { + return copied; + } - if (bytesInBuffer) { - /* There's a next byte. It will settle things. */ - ResetFlag(statePtr, INPUT_NEED_NL); + if (statePtr->inEofChar + && RemovePoint(bufPtr)[0] == statePtr->inEofChar) { + return 0; + } - if (RemovePoint(bufPtr)[0] == '\n') { - bufPtr->nextRemoved++; - bytesInBuffer--; - *result++ = '\n'; - } else { - *result++ = '\r'; + if (BytesLeft(bufPtr) == 1) { + + ChannelBuffer *nextPtr = bufPtr->nextPtr; + + if (nextPtr == NULL) { + + if (statePtr->flags & CHANNEL_EOF) { + *result = '\r'; + bufPtr->nextRemoved += 1; + return 1; + } + + SetFlag(statePtr, CHANNEL_NEED_MORE_DATA); + return 0; } - copied++; - space--; - } else if (statePtr->flags & CHANNEL_EOF) { - /* There is no next byte, and there never will be (EOF). */ - ResetFlag(statePtr, INPUT_NEED_NL); + + nextPtr->nextRemoved -= 1; + memcpy(RemovePoint(nextPtr), RemovePoint(bufPtr), 1); + RecycleBuffer(statePtr, bufPtr, 0); + statePtr->inQueueHead = nextPtr; + return 0; + } + + if (statePtr->inEofChar + && RemovePoint(bufPtr)[1] == statePtr->inEofChar) { *result = '\r'; + bufPtr->nextRemoved += 1; return 1; - } else { - /* There is no next byte. Ask the caller to read more. */ - return 0; } + /* + * Buffer is not empty. How can that be? + * 0) We stopped early due to the value of "space". + * => copied > 0 and all is fine. + * 1) We saw eof char and stopped the translation copy. + * => if (copied > 0) or ((copied == 0) and @ eof char), + * return is fine. + * 2) The buffer holds a \r while in CRLF translation, followed + * by either the end of the buffer, or the eof char. + */ + } - toCopy = space; - if (bytesInBuffer <= toCopy) { - toCopy = bytesInBuffer; - } - if (toCopy == 0) { - return copied; - } - TranslateInputEOL(statePtr, result, RemovePoint(bufPtr), - &toCopy, &bytesInBuffer); - bufPtr->nextRemoved += bytesInBuffer; - copied += toCopy; + + /* + * Return the number of characters copied into the result buffer. This may + * be different from the number of bytes consumed, because of EOL + * translations. + */ + + return copied; #else + copied = 0; switch (statePtr->inputTranslation) { case TCL_TRANSLATE_LF: if (bytesInBuffer == 0) { @@ -9265,7 +9303,6 @@ CopyAndTranslateBuffer( } } } -#endif /* * If the current buffer is empty recycle it. @@ -9286,6 +9323,7 @@ CopyAndTranslateBuffer( */ return copied; +#endif } /* @@ -10726,7 +10764,6 @@ DumpFlags( ChanFlag('S', CHANNEL_STICKY_EOF); ChanFlag('B', CHANNEL_BLOCKED); ChanFlag('/', INPUT_SAW_CR); - ChanFlag('*', INPUT_NEED_NL); ChanFlag('D', CHANNEL_DEAD); ChanFlag('R', CHANNEL_RAW_MODE); #ifdef TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING -- cgit v0.12 From 94a4d6ac65eed79a3fe89a71d1c2a429793300bc Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 5 Mar 2014 19:41:51 +0000 Subject: Remove old dead code; silence compiler warnings; tidy up. --- generic/tclIO.c | 415 ++------------------------------------------------------ generic/tclIO.h | 3 - 2 files changed, 12 insertions(+), 406 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 013f8dd..821d111 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5252,33 +5252,25 @@ ReadChars( * UTF-8. On output, contains another guess * based on the data seen so far. */ { - int toRead, factor, srcLen, dstNeeded, numBytes; - int srcRead, dstWrote, numChars, dstRead; - ChannelBuffer *bufPtr; - char *src, *dst; - Tcl_EncodingState oldState; - int encEndFlagSuppressed = 0; Tcl_Encoding encoding = statePtr->encoding? statePtr->encoding : GetBinaryEncoding(); - - factor = *factorPtr; - - bufPtr = statePtr->inQueueHead; - src = RemovePoint(bufPtr); - srcLen = BytesLeft(bufPtr); - - toRead = charsToRead; - if ((unsigned)toRead > (unsigned)srcLen) { - toRead = srcLen; - } + Tcl_EncodingState savedState = statePtr->inputEncodingState; + ChannelBuffer *bufPtr = statePtr->inQueueHead; + int savedIEFlags = statePtr->inputEncodingFlags; + int savedFlags = statePtr->flags; + char *dst, *src = RemovePoint(bufPtr); + int dstLimit, numBytes, srcLen = BytesLeft(bufPtr); + int toRead = ((unsigned) charsToRead > srcLen) ? srcLen : charsToRead; /* * 'factor' is how much we guess that the bytes in the source buffer will * expand when converted to UTF-8 chars. This guess comes from analyzing * how many characters were produced by the previous pass. */ + + int factor = *factorPtr; + int dstNeeded = TCL_UTF_MAX - 1 + toRead * factor / UTF_EXPANSION_FACTOR; - dstNeeded = TCL_UTF_MAX - 1 + toRead * factor / UTF_EXPANSION_FACTOR; (void) TclGetStringFromObj(objPtr, &numBytes); Tcl_AppendToObj(objPtr, NULL, dstNeeded); if (toRead == srcLen) { @@ -5289,8 +5281,6 @@ ReadChars( dst = TclGetString(objPtr) + numBytes; } -#if 1 - /* * This routine is burdened with satisfying several constraints. * It cannot append more than 'charsToRead` chars onto objPtr. @@ -5307,13 +5297,9 @@ ReadChars( * a consistent set of results. This takes the shape of a loop. */ - int dstLimit = dstNeeded + 1; - int savedFlags = statePtr->flags; - int savedIEFlags = statePtr->inputEncodingFlags; - Tcl_EncodingState savedState = statePtr->inputEncodingState; - + dstLimit = dstNeeded + 1; while (1) { - int dstDecoded; + int dstDecoded, dstRead, dstWrote, srcRead, numChars; /* * Perform the encoding transformation. Read no more than @@ -5555,200 +5541,6 @@ ReadChars( Tcl_SetObjLength(objPtr, numBytes + dstWrote); return numChars; } - -#else - /* - * [Bug 1462248]: The cause of the crash reported in this bug is this: - * - * - ReadChars, called with a single buffer, with a incomplete - * multi-byte character at the end (only the first byte of it). - * - Encoding translation fails, asks for more data - * - Data is read, and eof is reached, TCL_ENCODING_END (TEE) is set. - * - ReadChar is called again, converts the first buffer, but due to TEE - * it does not check for incomplete multi-byte data, and the character - * just after the end of the first buffer is a valid completion of the - * multi-byte header in the actual buffer. The conversion reads more - * characters from the buffer then present. This causes nextRemoved to - * overshoot nextAdded and the next reads compute a negative srcLen, - * cause further translations to fail, causing copying of data into the - * next buffer using bad arguments, causing the mecpy for to eventually - * fail. - * - * In the end it is a memory access bug spiraling out of control if the - * conditions are _just so_. And ultimate cause is that TEE is given to a - * conversion where it should not. TEE signals that this is the last - * buffer. Except in our case it is not. - * - * My solution is to suppress TEE if the first buffer is not the last. We - * will eventually need it given that EOF has been reached, but not right - * now. This is what the new flag "endEncSuppressFlag" is for. - * - * The bug in 'Tcl_Utf2UtfProc' where it read from memory behind the - * actual buffer has been fixed as well, and fixes the problem with the - * crash too, but this would still allow the generic layer to - * accidentially break a multi-byte sequence if the conditions are just - * right, because again the ExternalToUtf would be successful where it - * should not. - */ - - if ((statePtr->inputEncodingFlags & TCL_ENCODING_END) && - (bufPtr->nextPtr != NULL)) { - /* - * TEE is set for a buffer which is not the last. Squash it for now, - * and restore it later, before yielding control to our caller. - */ - - statePtr->inputEncodingFlags &= ~TCL_ENCODING_END; - encEndFlagSuppressed = 1; - } - - oldState = statePtr->inputEncodingState; - if (statePtr->flags & INPUT_NEED_NL) { - /* - * We want a '\n' because the last character we saw was '\r'. - */ - - ResetFlag(statePtr, INPUT_NEED_NL); - Tcl_ExternalToUtf(NULL, encoding, src, srcLen, - statePtr->inputEncodingFlags, &statePtr->inputEncodingState, - dst, TCL_UTF_MAX + 1, &srcRead, &dstWrote, &numChars); - if ((dstWrote > 0) && (*dst == '\n')) { - /* - * The next char was a '\n'. Consume it and produce a '\n'. - */ - - bufPtr->nextRemoved += srcRead; - } else { - /* - * The next char was not a '\n'. Produce a '\r'. - */ - - *dst = '\r'; - } - statePtr->inputEncodingFlags &= ~TCL_ENCODING_START; - Tcl_SetObjLength(objPtr, numBytes + 1); - - if (encEndFlagSuppressed) { - statePtr->inputEncodingFlags |= TCL_ENCODING_END; - } - return 1; - } - - Tcl_ExternalToUtf(NULL, encoding, src, srcLen, - statePtr->inputEncodingFlags, &statePtr->inputEncodingState, dst, - dstNeeded + 1, &srcRead, &dstWrote, &numChars); - - if (encEndFlagSuppressed) { - statePtr->inputEncodingFlags |= TCL_ENCODING_END; - } - - if (srcRead == 0) { - /* - * Not enough bytes in src buffer to make a complete char. Copy the - * bytes to the next buffer to make a new contiguous string, then tell - * the caller to fill the buffer with more bytes. - */ - - ChannelBuffer *nextPtr; - - nextPtr = bufPtr->nextPtr; - if (nextPtr == NULL) { - if (srcLen > 0) { - /* - * There isn't enough data in the buffers to complete the next - * character, so we need to wait for more data before the next - * file event can be delivered. [Bug 478856] - * - * The exception to this is if the input buffer was completely - * empty before we tried to convert its contents. Nothing in, - * nothing out, and no incomplete character data. The - * conversion before the current one was complete. - */ - - SetFlag(statePtr, CHANNEL_NEED_MORE_DATA); - } - Tcl_SetObjLength(objPtr, numBytes); - return -1; - } - - /* - * Space is made at the beginning of the buffer to copy the previous - * unused bytes there. Check first if the buffer we are using actually - * has enough space at its beginning for the data we are copying. - * Because if not we will write over the buffer management - * information, especially the 'nextPtr'. - * - * Note that the BUFFER_PADDING (See AllocChannelBuffer) is used to - * prevent exactly this situation. I.e. it should never happen. - * Therefore it is ok to panic should it happen despite the - * precautions. - */ - - if (nextPtr->nextRemoved - srcLen < 0) { - Tcl_Panic("Buffer Underflow, BUFFER_PADDING not enough"); - } - - nextPtr->nextRemoved -= srcLen; - memcpy(RemovePoint(nextPtr), src, (size_t) srcLen); - RecycleBuffer(statePtr, bufPtr, 0); - statePtr->inQueueHead = nextPtr; - Tcl_SetObjLength(objPtr, numBytes); - return ReadChars(statePtr, objPtr, charsToRead, factorPtr); - } - - dstRead = dstWrote; - if (TranslateInputEOL(statePtr, dst, dst, &dstWrote, &dstRead) != 0) { - /* - * Hit EOF char. How many bytes of src correspond to where the EOF was - * located in dst? Run the conversion again with an output buffer just - * big enough to hold the data so we can get the correct value for - * srcRead. - */ - - if (dstWrote == 0) { - Tcl_SetObjLength(objPtr, numBytes); - return -1; - } - statePtr->inputEncodingState = oldState; - Tcl_ExternalToUtf(NULL, encoding, src, srcLen, - statePtr->inputEncodingFlags, &statePtr->inputEncodingState, - dst, dstRead + TCL_UTF_MAX, &srcRead, &dstWrote, &numChars); - TranslateInputEOL(statePtr, dst, dst, &dstWrote, &dstRead); - } - - /* - * The number of characters that we got may be less than the number that - * we started with because "\r\n" sequences may have been turned into just - * '\n' in dst. - */ - - numChars -= (dstRead - dstWrote); - - if ((unsigned) numChars > (unsigned) toRead) { - /* - * Got too many chars. - */ - - const char *eof; - - eof = Tcl_UtfAtIndex(dst, toRead); - statePtr->inputEncodingState = oldState; - Tcl_ExternalToUtf(NULL, encoding, src, srcLen, - statePtr->inputEncodingFlags, &statePtr->inputEncodingState, - dst, eof - dst + TCL_UTF_MAX, &srcRead, &dstWrote, &numChars); - dstRead = dstWrote; - TranslateInputEOL(statePtr, dst, dst, &dstWrote, &dstRead); - numChars -= (dstRead - dstWrote); - } - statePtr->inputEncodingFlags &= ~TCL_ENCODING_START; - - bufPtr->nextRemoved += srcRead; - if (dstWrote > srcRead + 1) { - *factorPtr = dstWrote * UTF_EXPANSION_FACTOR / srcRead; - } - Tcl_SetObjLength(objPtr, numBytes + dstWrote); - return numChars; -#endif } /* @@ -9060,7 +8852,6 @@ CopyAndTranslateBuffer( bufPtr = statePtr->inQueueHead; bytesInBuffer = BytesLeft(bufPtr); -#if 1 copied = space; if (bytesInBuffer <= copied) { copied = bytesInBuffer; @@ -9142,188 +8933,6 @@ CopyAndTranslateBuffer( */ return copied; -#else - copied = 0; - switch (statePtr->inputTranslation) { - case TCL_TRANSLATE_LF: - if (bytesInBuffer == 0) { - return 0; - } - - /* - * Copy the current chunk into the result buffer. - */ - - if (bytesInBuffer < space) { - space = bytesInBuffer; - } - memcpy(result, RemovePoint(bufPtr), (size_t) space); - bufPtr->nextRemoved += space; - copied = space; - break; - case TCL_TRANSLATE_CR: { - char *end; - - if (bytesInBuffer == 0) { - return 0; - } - - /* - * Copy the current chunk into the result buffer, then replace all \r - * with \n. - */ - - if (bytesInBuffer < space) { - space = bytesInBuffer; - } - memcpy(result, RemovePoint(bufPtr), (size_t) space); - bufPtr->nextRemoved += space; - copied = space; - - for (end = result + copied; result < end; result++) { - if (*result == '\r') { - *result = '\n'; - } - } - break; - } - case TCL_TRANSLATE_CRLF: { - char *src, *end, *dst; - int curByte; - - /* - * If there is a held-back "\r" at EOF, produce it now. - */ - - if (bytesInBuffer == 0) { - if ((statePtr->flags & (INPUT_SAW_CR | CHANNEL_EOF)) == - (INPUT_SAW_CR | CHANNEL_EOF)) { - result[0] = '\r'; - ResetFlag(statePtr, INPUT_SAW_CR); - return 1; - } - return 0; - } - - /* - * Copy the current chunk and replace "\r\n" with "\n" (but not - * standalone "\r"!). - */ - - if (bytesInBuffer < space) { - space = bytesInBuffer; - } - memcpy(result, RemovePoint(bufPtr), (size_t) space); - bufPtr->nextRemoved += space; - copied = space; - - end = result + copied; - dst = result; - for (src = result; src < end; src++) { - curByte = *src; - if (curByte == '\n') { - ResetFlag(statePtr, INPUT_SAW_CR); - } else if (statePtr->flags & INPUT_SAW_CR) { - ResetFlag(statePtr, INPUT_SAW_CR); - *dst = '\r'; - dst++; - } - if (curByte == '\r') { - SetFlag(statePtr, INPUT_SAW_CR); - } else { - *dst = (char) curByte; - dst++; - } - } - copied = dst - result; - break; - } - case TCL_TRANSLATE_AUTO: { - char *src, *end, *dst; - int curByte; - - if (bytesInBuffer == 0) { - return 0; - } - - /* - * Loop over the current buffer, converting "\r" and "\r\n" to "\n". - */ - - if (bytesInBuffer < space) { - space = bytesInBuffer; - } - memcpy(result, RemovePoint(bufPtr), (size_t) space); - bufPtr->nextRemoved += space; - copied = space; - - end = result + copied; - dst = result; - for (src = result; src < end; src++) { - curByte = *src; - if (curByte == '\r') { - SetFlag(statePtr, INPUT_SAW_CR); - *dst = '\n'; - dst++; - } else { - if ((curByte != '\n') || !(statePtr->flags & INPUT_SAW_CR)) { - *dst = (char) curByte; - dst++; - } - ResetFlag(statePtr, INPUT_SAW_CR); - } - } - copied = dst - result; - break; - } - default: - Tcl_Panic("unknown eol translation mode"); - } - - /* - * If an in-stream EOF character is set for this channel, check that the - * input we copied so far does not contain the EOF char. If it does, copy - * only up to and excluding that character. - */ - - if (statePtr->inEofChar != 0) { - int i; - - for (i = 0; i < copied; i++) { - if (result[i] == (char) statePtr->inEofChar) { - /* - * Set sticky EOF so that no further input is presented to the - * caller. - */ - - SetFlag(statePtr, CHANNEL_EOF | CHANNEL_STICKY_EOF); - statePtr->inputEncodingFlags |= TCL_ENCODING_END; - copied = i; - break; - } - } - } - - /* - * If the current buffer is empty recycle it. - */ - - if (IsBufferEmpty(bufPtr)) { - statePtr->inQueueHead = bufPtr->nextPtr; - if (statePtr->inQueueHead == NULL) { - statePtr->inQueueTail = NULL; - } - RecycleBuffer(statePtr, bufPtr, 0); - } - - /* - * Return the number of characters copied into the result buffer. This may - * be different from the number of bytes consumed, because of EOL - * translations. - */ - - return copied; -#endif } /* diff --git a/generic/tclIO.h b/generic/tclIO.h index ebf2ef7..a57d4c5 100644 --- a/generic/tclIO.h +++ b/generic/tclIO.h @@ -252,9 +252,6 @@ typedef struct ChannelState { #define INPUT_SAW_CR (1<<12) /* Channel is in CRLF eol input * translation mode and the last byte * seen was a "\r". */ -#define INPUT_NEED_NL (1<<15) /* Saw a '\r' at end of last buffer, - * and there should be a '\n' at - * beginning of next buffer. */ #define CHANNEL_DEAD (1<<13) /* The channel has been closed by the * exit handler (on exit) but not * deallocated. When any IO operation -- cgit v0.12 From a59f5c70234be1134e3752519daa601c3c850365 Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 6 Mar 2014 15:51:06 +0000 Subject: Variable "rawStart" serves no purpose. --- generic/tclIO.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 821d111..b0d0e32 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -4464,7 +4464,7 @@ FilterInputBytes( ChannelState *statePtr = chanPtr->state; /* State info for channel */ ChannelBuffer *bufPtr; - char *raw, *rawStart, *dst; + char *raw, *dst; int offset, toRead, dstNeeded, spaceLeft, result, rawLen; Tcl_Obj *objPtr; #define ENCODING_LINESIZE 20 /* Lower bound on how many bytes to convert at @@ -4521,8 +4521,7 @@ FilterInputBytes( * string rep if we need more space. */ - rawStart = RemovePoint(bufPtr); - raw = rawStart; + raw = RemovePoint(bufPtr); rawLen = BytesLeft(bufPtr); dst = *gsPtr->dstPtr; -- cgit v0.12 From a895183137cb5e741f92353116465a9e27c432e4 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 7 Mar 2014 19:43:16 +0000 Subject: Simplify the input eof char scan. Update some comments. --- generic/tclIO.c | 64 +++++++++++++++++++++++++++++---------------------------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index b0d0e32..03aac32 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5244,7 +5244,11 @@ ReadChars( * is larger than the number of characters * available in the first buffer, only the * characters from the first buffer are - * returned. */ + * returned. The execption is when there is + * not any complete character in the first + * buffer. In that case, a recursive call + * effectively obtains chars from the + * second buffer. */ int *factorPtr) /* On input, contains a guess of how many * bytes need to be allocated to hold the * result of converting N source bytes to @@ -5259,6 +5263,15 @@ ReadChars( int savedFlags = statePtr->flags; char *dst, *src = RemovePoint(bufPtr); int dstLimit, numBytes, srcLen = BytesLeft(bufPtr); + + /* + * One src byte can yield at most one character. So when the + * number of src bytes we plan to read is less than the limit on + * character count to be read, clearly we will remain within that + * limit, and we can use the value of "srcLen" as a tighter limit + * for sizing receiving buffers. + */ + int toRead = ((unsigned) charsToRead > srcLen) ? srcLen : charsToRead; /* @@ -5569,43 +5582,32 @@ TranslateInputEOL( * characters. */ const char *srcStart, /* Source characters. */ int *dstLenPtr, /* On entry, the maximum length of output - * buffer in bytes; must be <= *srcLenPtr. On - * exit, the number of bytes actually used in - * output buffer. */ + * buffer in bytes. On exit, the number of + * bytes actually used in output buffer. */ int *srcLenPtr) /* On entry, the length of source buffer. On * exit, the number of bytes read from the * source buffer. */ { - int dstLen, srcLen, inEofChar; - const char *eof; + const char *eof = NULL; + int dstLen = *dstLenPtr; + int srcLen = *srcLenPtr; + int inEofChar = statePtr->inEofChar; - dstLen = *dstLenPtr; - - eof = NULL; - inEofChar = statePtr->inEofChar; if (inEofChar != '\0') { /* - * Find EOF in translated buffer then compress out the EOL. The source - * buffer may be much longer than the destination buffer - we only - * want to return EOF if the EOF has been copied to the destination - * buffer. + * Make sure we do not read past any logical end of channel input + * created by the presence of the input eof char. */ - const char *src, *srcMax; - - srcMax = srcStart + *srcLenPtr; - for (src = srcStart; src < srcMax; src++) { - if (*src == inEofChar) { - eof = src; - srcLen = src - srcStart; - if (srcLen < dstLen) { - dstLen = srcLen; - } - *srcLenPtr = srcLen; - break; - } + if ((eof = memchr(srcStart, inEofChar, srcLen))) { + srcLen = eof - srcStart; } } + + if (dstLen > srcLen) { + dstLen = srcLen; + } + switch (statePtr->inputTranslation) { case TCL_TRANSLATE_LF: if (dstStart != srcStart) { @@ -5635,7 +5637,7 @@ TranslateInputEOL( dst = dstStart; src = srcStart; srcEnd = srcStart + dstLen; - srcMax = srcStart + *srcLenPtr; + srcMax = srcStart + srcLen; for ( ; src < srcEnd; ) { if (*src == '\r') { @@ -5663,7 +5665,7 @@ TranslateInputEOL( dst = dstStart; src = srcStart; srcEnd = srcStart + dstLen; - srcMax = srcStart + *srcLenPtr; + srcMax = srcStart + srcLen; if ((statePtr->flags & INPUT_SAW_CR) && (src < srcMax)) { if (*src == '\n') { @@ -5692,9 +5694,10 @@ TranslateInputEOL( break; } default: - return 0; + Tcl_Panic("unknown input translation %d", statePtr->inputTranslation); } *dstLenPtr = dstLen; + *srcLenPtr = srcLen; if ((eof != NULL) && (srcStart + srcLen >= eof)) { /* @@ -5709,7 +5712,6 @@ TranslateInputEOL( return 1; } - *srcLenPtr = srcLen; return 0; } -- cgit v0.12 From 83f5493faa96da87b5327be1f49e432f5a870879 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 7 Mar 2014 20:15:12 +0000 Subject: TranslateInputEOL() callers no longer need assert dstLen <= srcLen. --- generic/tclIO.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 03aac32..d23ca03 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5330,7 +5330,8 @@ ReadChars( * Capture the number of bytes actually consumed in dstRead. */ - dstWrote = dstRead = dstDecoded; + dstWrote = dstLimit; + dstRead = dstDecoded; TranslateInputEOL(statePtr, dst, dst, &dstWrote, &dstRead); if (dstRead < dstDecoded) { @@ -8852,14 +8853,11 @@ CopyAndTranslateBuffer( } bufPtr = statePtr->inQueueHead; bytesInBuffer = BytesLeft(bufPtr); + if (bytesInBuffer == 0) { + return 0; + } copied = space; - if (bytesInBuffer <= copied) { - copied = bytesInBuffer; - } - if (copied == 0) { - return copied; - } TranslateInputEOL(statePtr, result, RemovePoint(bufPtr), &copied, &bytesInBuffer); bufPtr->nextRemoved += bytesInBuffer; -- cgit v0.12 From 62268820f73d797eebfc2a66ed3fa856c27daeb7 Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 10 Mar 2014 03:09:03 +0000 Subject: TranslateInputEOL doesn't need to return anything. No caller cares. Other optimizations and simplifications. --- generic/tclIO.c | 79 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 43 insertions(+), 36 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index d23ca03..6dfdd03 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -214,7 +214,7 @@ static int StackSetBlockMode(Channel *chanPtr, int mode); static int SetBlockMode(Tcl_Interp *interp, Channel *chanPtr, int mode); static void StopCopy(CopyState *csPtr); -static int TranslateInputEOL(ChannelState *statePtr, char *dst, +static void TranslateInputEOL(ChannelState *statePtr, char *dst, const char *src, int *dstLenPtr, int *srcLenPtr); static void UpdateInterest(Channel *chanPtr); static int Write(Channel *chanPtr, const char *src, @@ -5574,7 +5574,7 @@ ReadChars( *--------------------------------------------------------------------------- */ -static int +static void TranslateInputEOL( ChannelState *statePtr, /* Channel being read, for EOL translation and * EOF character. */ @@ -5594,6 +5594,29 @@ TranslateInputEOL( int srcLen = *srcLenPtr; int inEofChar = statePtr->inEofChar; + /* + * Depending on the translation mode in use, there's no need + * to scan more srcLen bytes at srcStart than can possibly transform + * to dstLen bytes. This keeps the scan for eof char below from + * being pointlessly long. + */ + + switch (statePtr->inputTranslation) { + case TCL_TRANSLATE_LF: + case TCL_TRANSLATE_CR: + if (srcLen > dstLen) { + /* In these modes, each src byte become a dst byte. */ + srcLen = dstLen; + } + break; + default: + /* In other modes, at most 2 src bytes become a dst byte. */ + if (srcLen > 2 * dstLen) { + srcLen = 2 * dstLen; + } + break; + } + if (inEofChar != '\0') { /* * Make sure we do not read past any logical end of channel input @@ -5605,36 +5628,29 @@ TranslateInputEOL( } } - if (dstLen > srcLen) { - dstLen = srcLen; - } - switch (statePtr->inputTranslation) { case TCL_TRANSLATE_LF: + case TCL_TRANSLATE_CR: if (dstStart != srcStart) { - memcpy(dstStart, srcStart, (size_t) dstLen); + memcpy(dstStart, srcStart, (size_t) srcLen); } - srcLen = dstLen; - break; - case TCL_TRANSLATE_CR: { - char *dst, *dstEnd; + if (statePtr->inputTranslation == TCL_TRANSLATE_CR) { + char *dst = dstStart; + char *dstEnd = dstStart + srcLen; - if (dstStart != srcStart) { - memcpy(dstStart, srcStart, (size_t) dstLen); - } - dstEnd = dstStart + dstLen; - for (dst = dstStart; dst < dstEnd; dst++) { - if (*dst == '\r') { - *dst = '\n'; + while ((dst = memchr(dst, '\r', dstEnd - dst))) { + *dst++ = '\n'; } } - srcLen = dstLen; + dstLen = srcLen; break; - } case TCL_TRANSLATE_CRLF: { char *dst; const char *src, *srcEnd, *srcMax; + if (dstLen > srcLen) { + dstLen = srcLen; + } dst = dstStart; src = srcStart; srcEnd = srcStart + dstLen; @@ -5660,29 +5676,23 @@ TranslateInputEOL( break; } case TCL_TRANSLATE_AUTO: { - char *dst; - const char *src, *srcEnd, *srcMax; + const char *srcEnd = srcStart + srcLen; + const char *dstEnd = dstStart + dstLen; + const char *src = srcStart; + char *dst = dstStart; - dst = dstStart; - src = srcStart; - srcEnd = srcStart + dstLen; - srcMax = srcStart + srcLen; - - if ((statePtr->flags & INPUT_SAW_CR) && (src < srcMax)) { + if ((statePtr->flags & INPUT_SAW_CR) && srcLen) { if (*src == '\n') { src++; } ResetFlag(statePtr, INPUT_SAW_CR); } - for ( ; src < srcEnd; ) { + for ( ; dst < dstEnd && src < srcEnd; ) { if (*src == '\r') { src++; - if (src >= srcMax) { + if (src == srcEnd) { SetFlag(statePtr, INPUT_SAW_CR); } else if (*src == '\n') { - if (srcEnd < srcMax) { - srcEnd++; - } src++; } *dst++ = '\n'; @@ -5710,10 +5720,7 @@ TranslateInputEOL( SetFlag(statePtr, CHANNEL_EOF | CHANNEL_STICKY_EOF); statePtr->inputEncodingFlags |= TCL_ENCODING_END; ResetFlag(statePtr, INPUT_SAW_CR); - return 1; } - - return 0; } /* -- cgit v0.12 From 091096d315755aa89f28bd063b426e16a4c16e51 Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 10 Mar 2014 17:58:32 +0000 Subject: Bring CRLF translation in parallel with others. --- generic/tclIO.c | 20 +++++++------------- tests/io.test | 14 ++++++++++++++ 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 6dfdd03..2971838 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5645,21 +5645,15 @@ TranslateInputEOL( dstLen = srcLen; break; case TCL_TRANSLATE_CRLF: { - char *dst; - const char *src, *srcEnd, *srcMax; - - if (dstLen > srcLen) { - dstLen = srcLen; - } - dst = dstStart; - src = srcStart; - srcEnd = srcStart + dstLen; - srcMax = srcStart + srcLen; + const char *srcEnd = srcStart + srcLen; + const char *dstEnd = dstStart + dstLen; + const char *src = srcStart; + char *dst = dstStart; - for ( ; src < srcEnd; ) { + for ( ; dst < dstEnd && src < srcEnd; ) { if (*src == '\r') { src++; - if (src >= srcMax) { + if (src == srcEnd) { src--; break; } else if (*src == '\n') { @@ -5710,7 +5704,7 @@ TranslateInputEOL( *dstLenPtr = dstLen; *srcLenPtr = srcLen; - if ((eof != NULL) && (srcStart + srcLen >= eof)) { + if (srcStart + srcLen == eof) { /* * EOF character was seen in EOL translated range. Leave current file * position pointing at the EOF character, but don't store the EOF diff --git a/tests/io.test b/tests/io.test index c325809..e3fff32 100644 --- a/tests/io.test +++ b/tests/io.test @@ -6858,6 +6858,20 @@ test io-52.14 {coverage of -translation crlf} { close $out file size $path(test2) } 29 +test io-52.14.1 {coverage of -translation crlf} { + file delete $path(test1) $path(test2) + set out [open $path(test1) wb] + chan configure $out -translation lf + puts -nonewline $out abcdefg\rhijklmn\nopqrstu\r\nvwxyz + close $out + set in [open $path(test1)] + chan configure $in -buffersize 8 -translation crlf + set out [open $path(test2) w] + fcopy $in $out -size 2 + close $in + close $out + file size $path(test2) +} 2 test io-52.15 {coverage of -translation crlf} { file delete $path(test1) $path(test2) set out [open $path(test1) wb] -- cgit v0.12 From dd5ac1c6419faed6fedef71a19409cb52335353c Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 10 Mar 2014 19:00:19 +0000 Subject: Rewrite CRLF translation to use more system calls. --- generic/tclIO.c | 46 ++++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 2971838..1070f0a 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5645,28 +5645,38 @@ TranslateInputEOL( dstLen = srcLen; break; case TCL_TRANSLATE_CRLF: { - const char *srcEnd = srcStart + srcLen; - const char *dstEnd = dstStart + dstLen; - const char *src = srcStart; + const char *crFound, *src = srcStart; char *dst = dstStart; - - for ( ; dst < dstEnd && src < srcEnd; ) { - if (*src == '\r') { - src++; - if (src == srcEnd) { - src--; - break; - } else if (*src == '\n') { - *dst++ = *src++; - } else { - *dst++ = '\r'; - } + int lesser = (dstLen < srcLen) ? dstLen : srcLen; + + while ((crFound = memchr(src, '\r', lesser))) { + int numBytes = crFound - src; + memmove(dst, src, numBytes); + + dst += numBytes; + src += numBytes; + dstLen -= numBytes; + srcLen -= numBytes; + if (srcLen == 1) { + /* valid src bytes end in \r */ + lesser = 0; + break; + } + if (src[1] == '\n') { + *dst++ = '\n'; + srcLen -= 2; + src += 2; } else { - *dst++ = *src++; + *dst++ = '\r'; + srcLen--; + src++; } + dstLen++; + lesser = (dstLen < srcLen) ? dstLen : srcLen; } - srcLen = src - srcStart; - dstLen = dst - dstStart; + memmove(dst, src, lesser); + srcLen = src + lesser - srcStart; + dstLen = dst + lesser - dstStart; break; } case TCL_TRANSLATE_AUTO: { -- cgit v0.12 From ea4c5e97e3d2d2751578fa19df54d98988aa46f4 Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 10 Mar 2014 19:29:54 +0000 Subject: Test for the bug I just committed. --- tests/io.test | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/io.test b/tests/io.test index e3fff32..1bc3799 100644 --- a/tests/io.test +++ b/tests/io.test @@ -6872,6 +6872,20 @@ test io-52.14.1 {coverage of -translation crlf} { close $out file size $path(test2) } 2 +test io-52.14.2 {coverage of -translation crlf} { + file delete $path(test1) $path(test2) + set out [open $path(test1) wb] + chan configure $out -translation lf + puts -nonewline $out abcdefg\rhijklmn\nopqrstu\r\nvwxyz + close $out + set in [open $path(test1)] + chan configure $in -translation crlf + set out [open $path(test2) w] + fcopy $in $out -size 9 + close $in + close $out + file size $path(test2) +} 9 test io-52.15 {coverage of -translation crlf} { file delete $path(test1) $path(test2) set out [open $path(test1) wb] -- cgit v0.12 From d539d0925f6d60f1334d053247c8f3112e8de938 Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 10 Mar 2014 19:30:22 +0000 Subject: .... and then the bug fix. --- generic/tclIO.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 1070f0a..6194637 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5671,7 +5671,7 @@ TranslateInputEOL( srcLen--; src++; } - dstLen++; + dstLen--; lesser = (dstLen < srcLen) ? dstLen : srcLen; } memmove(dst, src, lesser); -- cgit v0.12 From 507194e18d3ee09110002c002daa35eea2b249fd Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 11 Mar 2014 03:38:55 +0000 Subject: Trial rewrite of AUTO input translation. --- generic/tclIO.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/generic/tclIO.c b/generic/tclIO.c index 6194637..8d8e30f 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5680,6 +5680,40 @@ TranslateInputEOL( break; } case TCL_TRANSLATE_AUTO: { +#if 1 + const char *crFound, *src = srcStart; + char *dst = dstStart; + int lesser; + + if ((statePtr->flags & INPUT_SAW_CR) && srcLen) { + if (*src == '\n') { + src++; + srcLen--; + } + ResetFlag(statePtr, INPUT_SAW_CR); + } + lesser = (dstLen < srcLen) ? dstLen : srcLen; + while ((crFound = memchr(src, '\r', lesser))) { + int numBytes = crFound - src; + memmove(dst, src, numBytes); + + dst[numBytes] = '\n'; + dst += numBytes + 1; + dstLen -= numBytes + 1; + src += numBytes + 1; + srcLen -= numBytes + 1; + if (srcLen == 0) { + SetFlag(statePtr, INPUT_SAW_CR); + } else if (*src == '\n') { + src++; + srcLen--; + } + lesser = (dstLen < srcLen) ? dstLen : srcLen; + } + memmove(dst, src, lesser); + srcLen = src + lesser - srcStart; + dstLen = dst + lesser - dstStart; +#else const char *srcEnd = srcStart + srcLen; const char *dstEnd = dstStart + dstLen; const char *src = srcStart; @@ -5706,6 +5740,7 @@ TranslateInputEOL( } srcLen = src - srcStart; dstLen = dst - dstStart; +#endif break; } default: -- cgit v0.12 From 63b89595467a04db5b8a034eab47617e86ab6606 Mon Sep 17 00:00:00 2001 From: dgp Date: Tue, 11 Mar 2014 16:51:38 +0000 Subject: Compress code for better single screen viewing. --- generic/tclIO.c | 55 ++++++++----------------------------------------------- 1 file changed, 8 insertions(+), 47 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 8d8e30f..b4f1c0c 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5653,10 +5653,8 @@ TranslateInputEOL( int numBytes = crFound - src; memmove(dst, src, numBytes); - dst += numBytes; - src += numBytes; - dstLen -= numBytes; - srcLen -= numBytes; + dst += numBytes; dstLen -= numBytes; + src += numBytes; srcLen -= numBytes; if (srcLen == 1) { /* valid src bytes end in \r */ lesser = 0; @@ -5664,12 +5662,10 @@ TranslateInputEOL( } if (src[1] == '\n') { *dst++ = '\n'; - srcLen -= 2; - src += 2; + src += 2; srcLen -= 2; } else { *dst++ = '\r'; - srcLen--; - src++; + src++; srcLen--; } dstLen--; lesser = (dstLen < srcLen) ? dstLen : srcLen; @@ -5680,16 +5676,12 @@ TranslateInputEOL( break; } case TCL_TRANSLATE_AUTO: { -#if 1 const char *crFound, *src = srcStart; char *dst = dstStart; int lesser; if ((statePtr->flags & INPUT_SAW_CR) && srcLen) { - if (*src == '\n') { - src++; - srcLen--; - } + if (*src == '\n') { src++; srcLen--; } ResetFlag(statePtr, INPUT_SAW_CR); } lesser = (dstLen < srcLen) ? dstLen : srcLen; @@ -5698,49 +5690,18 @@ TranslateInputEOL( memmove(dst, src, numBytes); dst[numBytes] = '\n'; - dst += numBytes + 1; - dstLen -= numBytes + 1; - src += numBytes + 1; - srcLen -= numBytes + 1; + dst += numBytes + 1; dstLen -= numBytes + 1; + src += numBytes + 1; srcLen -= numBytes + 1; if (srcLen == 0) { SetFlag(statePtr, INPUT_SAW_CR); } else if (*src == '\n') { - src++; - srcLen--; + src++; srcLen--; } lesser = (dstLen < srcLen) ? dstLen : srcLen; } memmove(dst, src, lesser); srcLen = src + lesser - srcStart; dstLen = dst + lesser - dstStart; -#else - const char *srcEnd = srcStart + srcLen; - const char *dstEnd = dstStart + dstLen; - const char *src = srcStart; - char *dst = dstStart; - - if ((statePtr->flags & INPUT_SAW_CR) && srcLen) { - if (*src == '\n') { - src++; - } - ResetFlag(statePtr, INPUT_SAW_CR); - } - for ( ; dst < dstEnd && src < srcEnd; ) { - if (*src == '\r') { - src++; - if (src == srcEnd) { - SetFlag(statePtr, INPUT_SAW_CR); - } else if (*src == '\n') { - src++; - } - *dst++ = '\n'; - } else { - *dst++ = *src++; - } - } - srcLen = src - srcStart; - dstLen = dst - dstStart; -#endif break; } default: -- cgit v0.12 From b16e407595d059711eecc4f8a0a62a18294edff0 Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 17 Mar 2014 17:56:47 +0000 Subject: Remove long dead "BAD_BLOCKING" support code so it no longer confuses people reading/editing this code. --- generic/tclIO.c | 133 +++++--------------------------------------------------- generic/tclIO.h | 23 ---------- 2 files changed, 10 insertions(+), 146 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index b4f1c0c..0f894e4 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -4873,42 +4873,18 @@ Tcl_ReadRaw( ResetFlag(statePtr, CHANNEL_BLOCKED); } -#ifdef TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING /* - * [Bug 943274]. Better emulation of non-blocking channels for - * channels without BlockModeProc, by keeping track of true - * fileevents generated by the OS == Data waiting and reading if - * and only if we are sure to have data. + * Now go to the driver to get as much as is possible to + * fill the remaining request. Do all the error handling by + * ourselves. The code was stolen from 'GetInput' and + * slightly adapted (different return value here). + * + * The case of 'bytesToRead == 0' at this point cannot + * happen. */ - if ((statePtr->flags & CHANNEL_NONBLOCKING) && - (Tcl_ChannelBlockModeProc(chanPtr->typePtr) == NULL) && - !(statePtr->flags & CHANNEL_HAS_MORE_DATA)) { - /* - * We bypass the driver; it would block as no data is - * available. - */ - - nread = -1; - result = EWOULDBLOCK; - } else { -#endif /* TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING */ - - /* - * Now go to the driver to get as much as is possible to fill - * the remaining request. Do all the error handling by - * ourselves. The code was stolen from 'GetInput' and slightly - * adapted (different return value here). - * - * The case of 'bytesToRead == 0' at this point cannot happen. - */ - - nread = ChanRead(chanPtr, bufPtr + copied, - bytesToRead - copied, &result); - -#ifdef TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING - } -#endif /* TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING */ + nread = ChanRead(chanPtr, bufPtr + copied, + bytesToRead - copied, &result); if (nread > 0) { /* @@ -4921,18 +4897,6 @@ Tcl_ReadRaw( if (nread < (bytesToRead - copied)) { SetFlag(statePtr, CHANNEL_BLOCKED); } - -#ifdef TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING - if (nread <= (bytesToRead - copied)) { - /* - * [Bug 943274] We have read the available data, clear - * flag. - */ - - ResetFlag(statePtr, CHANNEL_HAS_MORE_DATA); - } -#endif /* TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING */ - } else if (nread == 0) { SetFlag(statePtr, CHANNEL_EOF); statePtr->inputEncodingFlags |= TCL_ENCODING_END; @@ -6041,32 +6005,7 @@ GetInput( return 0; } -#ifdef TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING - /* - * [SF Tcl Bug 943274]. Better emulation of non-blocking channels for - * channels without BlockModeProc, by keeping track of true fileevents - * generated by the OS == Data waiting and reading if and only if we are - * sure to have data. - */ - - if ((statePtr->flags & CHANNEL_NONBLOCKING) && - (Tcl_ChannelBlockModeProc(chanPtr->typePtr) == NULL) && - !(statePtr->flags & CHANNEL_HAS_MORE_DATA)) { - /* - * Bypass the driver, it would block, as no data is available - */ - - nread = -1; - result = EWOULDBLOCK; - } else { -#endif /* TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING */ - - nread = ChanRead(chanPtr, InsertPoint(bufPtr), toRead, &result); - -#ifdef TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING - } -#endif /* TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING */ - + nread = ChanRead(chanPtr, InsertPoint(bufPtr), toRead, &result); if (nread > 0) { bufPtr->nextAdded += nread; @@ -6080,18 +6019,6 @@ GetInput( if (nread < toRead) { SetFlag(statePtr, CHANNEL_BLOCKED); } - -#ifdef TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING - if (nread <= toRead) { - /* - * [SF Tcl Bug 943274] We have read the available data, clear - * flag. - */ - - ResetFlag(statePtr, CHANNEL_HAS_MORE_DATA); - } -#endif /* TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING */ - } else if (nread == 0) { SetFlag(statePtr, CHANNEL_EOF); statePtr->inputEncodingFlags |= TCL_ENCODING_END; @@ -7548,21 +7475,6 @@ Tcl_NotifyChannel( Channel *upChanPtr; const Tcl_ChannelType *upTypePtr; -#ifdef TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING - /* - * [SF Tcl Bug 943274] For a non-blocking channel without blockmodeproc we - * keep track of actual input coming from the OS so that we can do a - * credible imitation of non-blocking behaviour. - */ - - if ((mask & TCL_READABLE) && - (statePtr->flags & CHANNEL_NONBLOCKING) && - (Tcl_ChannelBlockModeProc(chanPtr->typePtr) == NULL) && - !(statePtr->flags & CHANNEL_TIMER_FEV)) { - SetFlag(statePtr, CHANNEL_HAS_MORE_DATA); - } -#endif /* TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING */ - /* * In contrast to the other API functions this procedure walks towards the * top of a stack and not down from it. @@ -7797,29 +7709,8 @@ ChannelTimerProc( */ statePtr->timer = Tcl_CreateTimerHandler(0, ChannelTimerProc,chanPtr); - -#ifdef TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING - /* - * Set the TIMER flag to notify the higher levels that the driver - * might have no data for us. We do this only if we are in - * non-blocking mode and the driver has no BlockModeProc because only - * then we really don't know if the driver will block or not. A - * similar test is done in "PeekAhead". - */ - - if ((statePtr->flags & CHANNEL_NONBLOCKING) && - (Tcl_ChannelBlockModeProc(chanPtr->typePtr) == NULL)) { - SetFlag(statePtr, CHANNEL_TIMER_FEV); - } -#endif /* TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING */ - Tcl_Preserve(statePtr); Tcl_NotifyChannel((Tcl_Channel)chanPtr, TCL_READABLE); - -#ifdef TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING - ResetFlag(statePtr, CHANNEL_TIMER_FEV); -#endif /* TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING */ - Tcl_Release(statePtr); } else { statePtr->timer = NULL; @@ -10381,10 +10272,6 @@ DumpFlags( ChanFlag('/', INPUT_SAW_CR); ChanFlag('D', CHANNEL_DEAD); ChanFlag('R', CHANNEL_RAW_MODE); -#ifdef TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING - ChanFlag('T', CHANNEL_TIMER_FEV); - ChanFlag('H', CHANNEL_HAS_MORE_DATA); -#endif /* TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING */ ChanFlag('x', CHANNEL_INCLOSE); buf[i] ='\0'; diff --git a/generic/tclIO.h b/generic/tclIO.h index a57d4c5..59754cf 100644 --- a/generic/tclIO.h +++ b/generic/tclIO.h @@ -271,29 +271,6 @@ typedef struct ChannelState { * changes. */ #define CHANNEL_RAW_MODE (1<<16) /* When set, notes that the Raw API is * being used. */ -#ifdef TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING -#define CHANNEL_TIMER_FEV (1<<17) /* When set the event we are notified - * by is a fileevent generated by a - * timer. We don't know if the driver - * has more data and should not try to - * read from it. If the system needs - * more than is in the buffers out - * read routines will simulate a short - * read (0 characters read) */ -#define CHANNEL_HAS_MORE_DATA (1<<18) /* Set by NotifyChannel for a channel - * if and only if the channel is - * configured non-blocking, the driver - * for said channel has no - * blockmodeproc, and data has arrived - * for reading at the OS level). A - * GetInput will pass reading from the - * driver if the channel is - * non-blocking, without blockmode - * proc and the flag has not been set. - * A read will be performed if the - * flag is set. This will reset the - * flag as well. */ -#endif /* TCL_IO_TRACK_OS_FOR_DRIVER_WITH_BAD_BLOCKING */ #define CHANNEL_INCLOSE (1<<19) /* Channel is currently being closed. * Its structures are still live and -- cgit v0.12 From 69b5edf8708c05f4abdb039c64ea28932478b400 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 19 Mar 2014 20:32:37 +0000 Subject: Complete rewrite of DoRead(). --- generic/tclIO.c | 288 ++++++++++++++++++++++++++------------------------------ 1 file changed, 132 insertions(+), 156 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 0f894e4..2d22942 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -11,6 +11,7 @@ * this file, and for a DISCLAIMER OF ALL WARRANTIES. */ +#undef NDEBUG #include "tclInt.h" #include "tclIO.h" #include @@ -173,8 +174,6 @@ static void CleanupChannelHandlers(Tcl_Interp *interp, static int CloseChannel(Tcl_Interp *interp, Channel *chanPtr, int errorCode); static void CommonGetsCleanup(Channel *chanPtr); -static int CopyAndTranslateBuffer(ChannelState *statePtr, - char *result, int space); static int CopyBuffer(Channel *chanPtr, char *result, int space); static int CopyData(CopyState *csPtr, int mask); static void CopyEventProc(ClientData clientData, int mask); @@ -188,7 +187,7 @@ static int DetachChannel(Tcl_Interp *interp, Tcl_Channel chan); static void DiscardInputQueued(ChannelState *statePtr, int discardSavedBuffers); static void DiscardOutputQueued(ChannelState *chanPtr); -static int DoRead(Channel *chanPtr, char *srcPtr, int slen); +static int DoRead(Channel *chanPtr, char *dst, int bytesToRead); static int DoReadChars(Channel *chan, Tcl_Obj *objPtr, int toRead, int appendFlag); static int FilterInputBytes(Channel *chanPtr, @@ -5363,7 +5362,7 @@ ReadChars( * record \r or \n yet. */ - assert(dstRead + 1 == dstDecoded); +// assert(dstRead + 1 == dstDecoded); assert(dst[dstRead] == '\r'); assert(statePtr->inputTranslation == TCL_TRANSLATE_CRLF); @@ -5384,7 +5383,7 @@ ReadChars( assert(dstWrote == 0); assert(dstRead == 0); - assert(dstDecoded == 1); +// assert(dstDecoded == 1); /* * We decoded only the bare cr, and we cannot read a @@ -5882,6 +5881,9 @@ DiscardInputQueued( * * Reads input data from a device into a channel buffer. * + * IMPORTANT! This routine is only called on a chanPtr argument + * that is the top channel of a stack! + * * Results: * The return value is the Posix error code if an error occurred while * reading from the file, or 0 otherwise. @@ -8633,13 +8635,24 @@ CopyData( * * DoRead -- * - * Reads a given number of bytes from a channel. + * Stores up to "bytesToRead" bytes in memory pointed to by "dst". + * These bytes come from reading the channel "chanPtr" and + * performing the configured translations. * * No encoding conversions are applied to the bytes being read. * * Results: - * The number of characters read, or -1 on error. Use Tcl_GetErrno() to - * retrieve the error code for the error that occurred. + * The number of bytes actually stored (<= bytesToRead), + * or -1 if there is an error in reading the channel. Use + * Tcl_GetErrno() to retrieve the error code for the error + * that occurred. + * + * The number of bytes stored can be less than the number + * requested when + * - EOF is reached on the channel; or + * - the channel is non-blocking, and we've read all we can + * without blocking. + * - a channel reading error occurs (and we return -1) * * Side effects: * May cause input to be buffered. @@ -8650,186 +8663,149 @@ CopyData( static int DoRead( Channel *chanPtr, /* The channel from which to read. */ - char *bufPtr, /* Where to store input read. */ - int toRead) /* Maximum number of bytes to read. */ + char *dst, /* Where to store input read. */ + int bytesToRead) /* Maximum number of bytes to read. */ { ChannelState *statePtr = chanPtr->state; - /* State info for channel */ - int copied; /* How many characters were copied into the - * result string? */ - int copiedNow; /* How many characters were copied from the - * current input buffer? */ - int result; /* Of calling GetInput. */ + char *p = dst; - /* - * If we have not encountered a sticky EOF, clear the EOF bit. Either way - * clear the BLOCKED bit. We want to discover these anew during each - * operation. - */ + while (bytesToRead) { + /* + * Each pass through the loop is intended to process up to + * one channel buffer. + * + * First, if there is no full buffer, we attempt to + * create and/or fill one. + */ - if (!(statePtr->flags & CHANNEL_STICKY_EOF)) { - ResetFlag(statePtr, CHANNEL_EOF); - } - ResetFlag(statePtr, CHANNEL_BLOCKED | CHANNEL_NEED_MORE_DATA); + ChannelBuffer *bufPtr = statePtr->inQueueHead; - for (copied = 0; copied < toRead; copied += copiedNow) { - copiedNow = CopyAndTranslateBuffer(statePtr, bufPtr + copied, - toRead - copied); - if (copiedNow == 0) { - if (statePtr->flags & CHANNEL_EOF) { - goto done; - } - if (statePtr->flags & CHANNEL_BLOCKED) { - if (statePtr->flags & CHANNEL_NONBLOCKING) { - goto done; - } - ResetFlag(statePtr, CHANNEL_BLOCKED); + if (statePtr->flags & CHANNEL_EOF + && (bufPtr == NULL || IsBufferEmpty(bufPtr))) { + break; + } + + while (bufPtr == NULL || !IsBufferFull(bufPtr)) { + int code; + + ResetFlag(statePtr, CHANNEL_BLOCKED); + moreData: + code = GetInput(chanPtr); + bufPtr = statePtr->inQueueHead; + if (statePtr->flags & (CHANNEL_EOF|CHANNEL_BLOCKED)) { + /* Further reads cannot do any more */ + break; } - result = GetInput(chanPtr); - if (result != 0) { - if (result != EAGAIN) { - copied = -1; - } - goto done; + + if (code) { + /* Read error */ + UpdateInterest(chanPtr); + return -1; } } - } - ResetFlag(statePtr, CHANNEL_BLOCKED); + /* Here we know bufPtr != NULL */ + int bytesRead = BytesLeft(bufPtr); + int bytesWritten = bytesToRead; - /* - * Update the notifier state so we don't block while there is still data - * in the buffers. - */ + if (bytesRead == 0 && statePtr->flags & CHANNEL_NONBLOCKING + && statePtr->flags & CHANNEL_BLOCKED) { + break; + } - done: - UpdateInterest(chanPtr); - return copied; -} - -/* - *---------------------------------------------------------------------- - * - * CopyAndTranslateBuffer -- - * - * Copy at most one buffer of input to the result space, doing eol - * translations according to mode in effect currently. - * - * Results: - * Number of bytes stored in the result buffer (as opposed to the number - * of bytes read from the channel). May return zero if no input is - * available to be translated. - * - * Side effects: - * Consumes buffered input. May deallocate one buffer. - * - *---------------------------------------------------------------------- - */ + TranslateInputEOL(statePtr, p, RemovePoint(bufPtr), + &bytesWritten, &bytesRead); + bufPtr->nextRemoved += bytesRead; + p += bytesWritten; + bytesToRead -= bytesWritten; -static int -CopyAndTranslateBuffer( - ChannelState *statePtr, /* Channel state from which to read input. */ - char *result, /* Where to store the copied input. */ - int space) /* How many bytes are available in result to - * store the copied input? */ -{ - ChannelBuffer *bufPtr; /* The buffer from which to copy bytes. */ - int bytesInBuffer; /* How many bytes are available to be copied - * in the current input buffer? */ - int copied; /* How many characters were already copied - * into the destination space? */ + if (!IsBufferEmpty(bufPtr)) { + /* + * Buffer is not empty. How can that be? + * + * 0) We stopped early because we got all the bytes + * we were seeking. That's fine. + */ - /* - * If there is no input at all, return zero. The invariant is that either - * there is no buffer in the queue, or if the first buffer is empty, it is - * also the last buffer (and thus there is no input in the queue). Note - * also that if the buffer is empty, we leave it in the queue. - */ + if (bytesToRead == 0) { + UpdateInterest(chanPtr); + break; + } - if (statePtr->inQueueHead == NULL) { - return 0; - } - bufPtr = statePtr->inQueueHead; - bytesInBuffer = BytesLeft(bufPtr); - if (bytesInBuffer == 0) { - return 0; - } + /* + * 1) We're @EOF because we saw eof char. + */ - copied = space; - TranslateInputEOL(statePtr, result, RemovePoint(bufPtr), - &copied, &bytesInBuffer); - bufPtr->nextRemoved += bytesInBuffer; + if (statePtr->inEofChar + && RemovePoint(bufPtr)[0] == statePtr->inEofChar) { + UpdateInterest(chanPtr); + break; + } - /* - * If the current buffer is empty recycle it. - */ + /* + * 2) The buffer holds a \r while in CRLF translation, followed + * by either the end of the buffer, or the eof char. + */ - if (IsBufferEmpty(bufPtr)) { - statePtr->inQueueHead = bufPtr->nextPtr; - if (statePtr->inQueueHead == NULL) { - statePtr->inQueueTail = NULL; - } - RecycleBuffer(statePtr, bufPtr, 0); - } else { + assert(statePtr->inputTranslation == TCL_TRANSLATE_CRLF); + assert(RemovePoint(bufPtr)[0] == '\r'); - if (copied > 0) { - return copied; - } + if (BytesLeft(bufPtr) > 1) { - if (statePtr->inEofChar - && RemovePoint(bufPtr)[0] == statePtr->inEofChar) { - return 0; - } + /* TODO: shift this to TIEOL */ + assert(statePtr->inEofChar); + assert(RemovePoint(bufPtr)[1] == statePtr->inEofChar); - if (BytesLeft(bufPtr) == 1) { + bufPtr->nextRemoved++; + *p++ = '\r'; + bytesToRead--; + UpdateInterest(chanPtr); + break; + } - ChannelBuffer *nextPtr = bufPtr->nextPtr; + assert(BytesLeft(bufPtr) == 1); - if (nextPtr == NULL) { + if (bufPtr->nextPtr == NULL) { + /* There's no more buffered data.... */ if (statePtr->flags & CHANNEL_EOF) { - *result = '\r'; - bufPtr->nextRemoved += 1; - return 1; - } + /* ...and there never will be. */ - SetFlag(statePtr, CHANNEL_NEED_MORE_DATA); - return 0; + *p++ = '\r'; + bytesToRead--; + bufPtr->nextRemoved++; + } else if (statePtr->flags & CHANNEL_BLOCKED) { + /* ...and we cannot get more now. */ + SetFlag(statePtr, CHANNEL_NEED_MORE_DATA); + UpdateInterest(chanPtr); + break; + } else { + /* ... so we need to get some. */ + goto moreData; + } } - nextPtr->nextRemoved -= 1; - memcpy(RemovePoint(nextPtr), RemovePoint(bufPtr), 1); - RecycleBuffer(statePtr, bufPtr, 0); - statePtr->inQueueHead = nextPtr; - return 0; - } + if (bufPtr->nextPtr) { + /* There's a next buffer. Shift orphan \r to it. */ - if (statePtr->inEofChar - && RemovePoint(bufPtr)[1] == statePtr->inEofChar) { - *result = '\r'; - bufPtr->nextRemoved += 1; - return 1; + ChannelBuffer *nextPtr = bufPtr->nextPtr; + + nextPtr->nextRemoved -= 1; + RemovePoint(nextPtr)[0] = '\r'; + bufPtr->nextRemoved++; + } } - /* - * Buffer is not empty. How can that be? - * 0) We stopped early due to the value of "space". - * => copied > 0 and all is fine. - * 1) We saw eof char and stopped the translation copy. - * => if (copied > 0) or ((copied == 0) and @ eof char), - * return is fine. - * 2) The buffer holds a \r while in CRLF translation, followed - * by either the end of the buffer, or the eof char. - */ + if (IsBufferEmpty(bufPtr)) { + statePtr->inQueueHead = bufPtr->nextPtr; + if (statePtr->inQueueHead == NULL) { + statePtr->inQueueTail = NULL; + } + RecycleBuffer(statePtr, bufPtr, 0); + } } - /* - * Return the number of characters copied into the result buffer. This may - * be different from the number of bytes consumed, because of EOL - * translations. - */ - - return copied; + return (int)(p - dst); } /* -- cgit v0.12 From a174107efac416856e4183ea90821edac8c266b2 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 19 Mar 2014 21:43:31 +0000 Subject: Let TranslateInputEOL handle the "\r$eofChar" sequence in CRLF mode. --- generic/tclIO.c | 44 +++++++++++++++++--------------------------- 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 2d22942..267b659 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5362,7 +5362,7 @@ ReadChars( * record \r or \n yet. */ -// assert(dstRead + 1 == dstDecoded); + assert(dstRead + 1 == dstDecoded); assert(dst[dstRead] == '\r'); assert(statePtr->inputTranslation == TCL_TRANSLATE_CRLF); @@ -5383,7 +5383,7 @@ ReadChars( assert(dstWrote == 0); assert(dstRead == 0); -// assert(dstDecoded == 1); + assert(dstDecoded == 1); /* * We decoded only the bare cr, and we cannot read a @@ -5620,10 +5620,14 @@ TranslateInputEOL( src += numBytes; srcLen -= numBytes; if (srcLen == 1) { /* valid src bytes end in \r */ - lesser = 0; - break; - } - if (src[1] == '\n') { + if (eof) { + *dst++ = '\r'; + src++; srcLen--; + } else { + lesser = 0; + break; + } + } else if (src[1] == '\n') { *dst++ = '\n'; src += 2; srcLen -= 2; } else { @@ -8708,11 +8712,6 @@ DoRead( int bytesRead = BytesLeft(bufPtr); int bytesWritten = bytesToRead; - if (bytesRead == 0 && statePtr->flags & CHANNEL_NONBLOCKING - && statePtr->flags & CHANNEL_BLOCKED) { - break; - } - TranslateInputEOL(statePtr, p, RemovePoint(bufPtr), &bytesWritten, &bytesRead); bufPtr->nextRemoved += bytesRead; @@ -8743,26 +8742,12 @@ DoRead( } /* - * 2) The buffer holds a \r while in CRLF translation, followed - * by either the end of the buffer, or the eof char. + * 2) The buffer holds a \r while in CRLF translation, + * followed by the end of the buffer. */ assert(statePtr->inputTranslation == TCL_TRANSLATE_CRLF); assert(RemovePoint(bufPtr)[0] == '\r'); - - if (BytesLeft(bufPtr) > 1) { - - /* TODO: shift this to TIEOL */ - assert(statePtr->inEofChar); - assert(RemovePoint(bufPtr)[1] == statePtr->inEofChar); - - bufPtr->nextRemoved++; - *p++ = '\r'; - bytesToRead--; - UpdateInterest(chanPtr); - break; - } - assert(BytesLeft(bufPtr) == 1); if (bufPtr->nextPtr == NULL) { @@ -8803,6 +8788,11 @@ DoRead( } RecycleBuffer(statePtr, bufPtr, 0); } + + if (statePtr->flags & CHANNEL_NONBLOCKING + && statePtr->flags & CHANNEL_BLOCKED) { + break; + } } return (int)(p - dst); -- cgit v0.12 From eae6f97866efd02f09d961bf695047a3b47ac961 Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 20 Mar 2014 16:31:11 +0000 Subject: Use assertions about the pushback buffers to simplify their handling. Mark several things left TODO. Some tidying. --- generic/tclIO.c | 60 +++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 23 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 267b659..b423bcc 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -6,6 +6,7 @@ * * Copyright (c) 1998-2000 Ajuba Solutions * Copyright (c) 1995-1997 Sun Microsystems, Inc. + * Contributions from Don Porter, NIST, 2014. (not subject to US copyright) * * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. @@ -1679,17 +1680,17 @@ Tcl_StackChannel( */ if (((mask & TCL_READABLE) != 0) && (statePtr->inQueueHead != NULL)) { + /* - * Remark: It is possible that the channel buffers contain data from - * some earlier push-backs. + * When statePtr->inQueueHead is not NULL, we know + * prevChanPtr->inQueueHead must be NULL. */ - statePtr->inQueueTail->nextPtr = prevChanPtr->inQueueHead; - prevChanPtr->inQueueHead = statePtr->inQueueHead; + assert(prevChanPtr->inQueueHead == NULL); + assert(prevChanPtr->inQueueTail == NULL); - if (prevChanPtr->inQueueTail == NULL) { - prevChanPtr->inQueueTail = statePtr->inQueueTail; - } + prevChanPtr->inQueueHead = statePtr->inQueueHead; + prevChanPtr->inQueueTail = statePtr->inQueueTail; statePtr->inQueueHead = NULL; statePtr->inQueueTail = NULL; @@ -2254,6 +2255,7 @@ RecycleBuffer( } /* + * TODO * Only save buffers which are at least as big as the requested buffersize * for the channel. This is to honor dynamic changes of the buffersize * made by the user. @@ -3696,9 +3698,7 @@ Write( &statePtr->outputEncodingState, dst, dstLen + BUFFER_PADDING, &srcRead, &dstWrote, NULL); - if (srcRead != nlLen) { - Tcl_Panic("Can This Happen?"); - } + assert (srcRead == nlLen); bufPtr->nextAdded += dstWrote; src++; @@ -4837,6 +4837,7 @@ Tcl_ReadRaw( int nread, result, copied, copiedNow; /* + * TODO VERIFY * The check below does too much because it will reject a call to this * function with a channel which is part of an 'fcopy'. But we have to * allow this here or else the chaining in the transformation drivers will @@ -5925,16 +5926,11 @@ GetInput( * channel in the stack and use them. They can be the result of a * transformation which went away without reading all the information * placed in the area when it was stacked. - * - * Two possibilities for the state: No buffers in it, or a single empty - * buffer. In the latter case we can recycle it now. */ if (chanPtr->inQueueHead != NULL) { - if (statePtr->inQueueHead != NULL) { - RecycleBuffer(statePtr, statePtr->inQueueHead, 0); - statePtr->inQueueHead = NULL; - } + + assert(statePtr->inQueueHead == NULL); statePtr->inQueueHead = chanPtr->inQueueHead; statePtr->inQueueTail = chanPtr->inQueueTail; @@ -5962,6 +5958,7 @@ GetInput( statePtr->saveInBufPtr = NULL; /* + * TODO * Check the actual buffersize against the requested buffersize. * Buffers which are smaller than requested are squashed. This is done * to honor dynamic changes of the buffersize made by the user. @@ -5979,6 +5976,7 @@ GetInput( bufPtr->nextPtr = NULL; /* + * TODO * SF #427196: Use the actual size of the buffer to determine the * number of bytes to read from the channel and not the size for new * buffers. They can be different if the buffersize was changed @@ -6003,6 +6001,7 @@ GetInput( } /* + * TODO * If EOF is set, we should avoid calling the driver because on some * platforms it is impossible to read from a device after EOF. */ @@ -6524,6 +6523,7 @@ CheckChannelErrors( if (direction == TCL_READABLE) { /* + * TODO * If we have not encountered a sticky EOF, clear the EOF bit (sticky * EOF is set if we have seen the input eofChar, to prevent reading * beyond the eofChar). Also, always clear the BLOCKED bit. We want to @@ -6739,6 +6739,7 @@ Tcl_SetChannelBufferSize( ChannelState *statePtr; /* State of real channel structure. */ /* + * TODO * Clip the buffer size to force it into the [1,1M] range */ @@ -7375,6 +7376,7 @@ Tcl_SetChannelOption( } /* + * TODO * If bufsize changes, need to get rid of old utility buffer. */ @@ -8677,18 +8679,23 @@ DoRead( /* * Each pass through the loop is intended to process up to * one channel buffer. - * - * First, if there is no full buffer, we attempt to - * create and/or fill one. */ + int bytesRead, bytesWritten; ChannelBuffer *bufPtr = statePtr->inQueueHead; + /* + * When there's no buffered data to read, and we're at EOF, + * escape to the caller. + */ + if (statePtr->flags & CHANNEL_EOF && (bufPtr == NULL || IsBufferEmpty(bufPtr))) { break; } + /* If there is no full buffer, attempt to create and/or fill one. */ + while (bufPtr == NULL || !IsBufferFull(bufPtr)) { int code; @@ -8696,6 +8703,9 @@ DoRead( moreData: code = GetInput(chanPtr); bufPtr = statePtr->inQueueHead; + + assert (bufPtr != NULL); + if (statePtr->flags & (CHANNEL_EOF|CHANNEL_BLOCKED)) { /* Further reads cannot do any more */ break; @@ -8706,11 +8716,14 @@ DoRead( UpdateInterest(chanPtr); return -1; } + + assert (IsBufferFull(bufPtr)); } - /* Here we know bufPtr != NULL */ - int bytesRead = BytesLeft(bufPtr); - int bytesWritten = bytesToRead; + assert (bufPtr != NULL); + + bytesRead = BytesLeft(bufPtr); + bytesWritten = bytesToRead; TranslateInputEOL(statePtr, p, RemovePoint(bufPtr), &bytesWritten, &bytesRead); @@ -10155,6 +10168,7 @@ SetChannelFromAny( } if (objPtr->typePtr == &chanObjType) { /* + * TODO: TAINT Flag and dup'd channel values? * The channel is valid until any call to DetachChannel occurs. * Ensure consistency checks are done. */ -- cgit v0.12 From 8c3da02c3e41f1b7e029ed7633e250646fe7ec82 Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 20 Mar 2014 19:25:11 +0000 Subject: Stop routine clearing of CHANNEL_EOF. Only clear when there's a reason (seek, eofchar change, ungets). Otherwise, once you hit EOF you stay there. --- generic/tclIO.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index b423bcc..1a56811 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -4837,7 +4837,6 @@ Tcl_ReadRaw( int nread, result, copied, copiedNow; /* - * TODO VERIFY * The check below does too much because it will reject a call to this * function with a channel which is part of an 'fcopy'. But we have to * allow this here or else the chaining in the transformation drivers will @@ -5742,16 +5741,11 @@ Tcl_Ungets( statePtr->flags = flags; /* - * If we have encountered a sticky EOF, just punt without storing (sticky - * EOF is set if we have seen the input eofChar, to prevent reading beyond - * the eofChar). Otherwise, clear the EOF flags, and clear the BLOCKED - * bit. We want to discover these conditions anew in each operation. + * Clear the EOF flags, and clear the BLOCKED bit. */ - if (statePtr->flags & CHANNEL_STICKY_EOF) { - goto done; - } - ResetFlag(statePtr, CHANNEL_BLOCKED | CHANNEL_EOF); + ResetFlag(statePtr, + CHANNEL_BLOCKED | CHANNEL_STICKY_EOF | CHANNEL_EOF | INPUT_SAW_CR); bufPtr = AllocChannelBuffer(len); memcpy(InsertPoint(bufPtr), str, (size_t) len); @@ -6001,7 +5995,7 @@ GetInput( } /* - * TODO + * TODO - consider escape before buffer alloc * If EOF is set, we should avoid calling the driver because on some * platforms it is impossible to read from a device after EOF. */ @@ -6523,16 +6517,10 @@ CheckChannelErrors( if (direction == TCL_READABLE) { /* - * TODO - * If we have not encountered a sticky EOF, clear the EOF bit (sticky - * EOF is set if we have seen the input eofChar, to prevent reading - * beyond the eofChar). Also, always clear the BLOCKED bit. We want to - * discover these conditions anew in each operation. + * Clear the BLOCKED bit. We want to discover this condition + * anew in each operation. */ - if ((statePtr->flags & CHANNEL_STICKY_EOF) == 0) { - ResetFlag(statePtr, CHANNEL_EOF); - } ResetFlag(statePtr, CHANNEL_BLOCKED | CHANNEL_NEED_MORE_DATA); } -- cgit v0.12 From 60a84571795909d2b51dff06349107716ae3ab6d Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 20 Mar 2014 20:18:20 +0000 Subject: Don't allow buffer recycling to prevent or delay buffersize shrinkage. --- generic/tclIO.c | 68 ++++++++++++++++++++++++--------------------------------- 1 file changed, 28 insertions(+), 40 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 1a56811..e7653f6 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -2255,13 +2255,12 @@ RecycleBuffer( } /* - * TODO - * Only save buffers which are at least as big as the requested buffersize - * for the channel. This is to honor dynamic changes of the buffersize + * Only save buffers which have the requested buffersize for the + * channel. This is to honor dynamic changes of the buffersize * made by the user. */ - if ((bufPtr->bufLength - BUFFER_PADDING) < statePtr->bufSize) { + if ((bufPtr->bufLength - BUFFER_PADDING) != statePtr->bufSize) { ckfree((char *) bufPtr); return; } @@ -5952,14 +5951,13 @@ GetInput( statePtr->saveInBufPtr = NULL; /* - * TODO * Check the actual buffersize against the requested buffersize. - * Buffers which are smaller than requested are squashed. This is done + * Saved buffers of the wrong size are squashed. This is done * to honor dynamic changes of the buffersize made by the user. */ if ((bufPtr != NULL) - && (bufPtr->bufLength - BUFFER_PADDING < statePtr->bufSize)) { + && (bufPtr->bufLength - BUFFER_PADDING != statePtr->bufSize)) { ckfree((char *) bufPtr); bufPtr = NULL; } @@ -5969,22 +5967,8 @@ GetInput( } bufPtr->nextPtr = NULL; - /* - * TODO - * SF #427196: Use the actual size of the buffer to determine the - * number of bytes to read from the channel and not the size for new - * buffers. They can be different if the buffersize was changed - * between reads. - * - * Note: This affects performance negatively if the buffersize was - * extended but this small buffer is reused for all subsequent reads. - * The system never uses buffers with the requested bigger size in - * that case. An adjunct patch could try and delete all unused buffers - * it encounters and which are smaller than the formally requested - * buffersize. - */ - toRead = SpaceLeft(bufPtr); + assert(toRead == statePtr->bufSize); if (statePtr->inQueueTail == NULL) { statePtr->inQueueHead = bufPtr; @@ -6727,7 +6711,6 @@ Tcl_SetChannelBufferSize( ChannelState *statePtr; /* State of real channel structure. */ /* - * TODO * Clip the buffer size to force it into the [1,1M] range */ @@ -6738,7 +6721,27 @@ Tcl_SetChannelBufferSize( } statePtr = ((Channel *) chan)->state; + + if (statePtr->bufSize == sz) { + return; + } statePtr->bufSize = sz; + + /* + * If bufsize changes, need to get rid of old utility buffer. + */ + + if (statePtr->saveInBufPtr != NULL) { + RecycleBuffer(statePtr, statePtr->saveInBufPtr, 1); + statePtr->saveInBufPtr = NULL; + } + if ((statePtr->inQueueHead != NULL) + && (statePtr->inQueueHead->nextPtr == NULL) + && IsBufferEmpty(statePtr->inQueueHead)) { + RecycleBuffer(statePtr, statePtr->inQueueHead, 1); + statePtr->inQueueHead = NULL; + statePtr->inQueueTail = NULL; + } } /* @@ -7172,6 +7175,7 @@ Tcl_SetChannelOption( return TCL_ERROR; } Tcl_SetChannelBufferSize(chan, newBufferSize); + return TCL_OK; } else if (HaveOpt(2, "-encoding")) { Tcl_Encoding encoding; @@ -7202,6 +7206,7 @@ Tcl_SetChannelOption( statePtr->outputEncodingFlags = TCL_ENCODING_START; ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA); UpdateInterest(chanPtr); + return TCL_OK; } else if (HaveOpt(2, "-eofchar")) { if (Tcl_SplitList(interp, newValue, &argc, &argv) == TCL_ERROR) { return TCL_ERROR; @@ -7363,23 +7368,6 @@ Tcl_SetChannelOption( return Tcl_BadChannelOption(interp, optionName, NULL); } - /* - * TODO - * If bufsize changes, need to get rid of old utility buffer. - */ - - if (statePtr->saveInBufPtr != NULL) { - RecycleBuffer(statePtr, statePtr->saveInBufPtr, 1); - statePtr->saveInBufPtr = NULL; - } - if ((statePtr->inQueueHead != NULL) - && (statePtr->inQueueHead->nextPtr == NULL) - && IsBufferEmpty(statePtr->inQueueHead)) { - RecycleBuffer(statePtr, statePtr->inQueueHead, 1); - statePtr->inQueueHead = NULL; - statePtr->inQueueTail = NULL; - } - return TCL_OK; } -- cgit v0.12