summaryrefslogtreecommitdiffstats
path: root/generic/tclIO.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclIO.c')
-rw-r--r--generic/tclIO.c289
1 files changed, 272 insertions, 17 deletions
diff --git a/generic/tclIO.c b/generic/tclIO.c
index c9822e9..c47eaea 100644
--- a/generic/tclIO.c
+++ b/generic/tclIO.c
@@ -10,7 +10,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclIO.c,v 1.121.2.3 2007/09/17 15:03:44 dgp Exp $
+ * RCS: @(#) $Id: tclIO.c,v 1.121.2.4 2007/11/12 19:18:17 dgp Exp $
*/
#include "tclInt.h"
@@ -87,6 +87,7 @@ static int FilterInputBytes(Channel *chanPtr,
GetsState *statePtr);
static int FlushChannel(Tcl_Interp *interp, Channel *chanPtr,
int calledFromAsyncFlush);
+static int TclGetsObjBinary(Tcl_Channel chan, Tcl_Obj *objPtr);
static void FreeBinaryEncoding(ClientData clientData);
static Tcl_HashTable * GetChannelTable(Tcl_Interp *interp);
static int GetInput(Channel *chanPtr);
@@ -3231,15 +3232,20 @@ DoWriteChars(
/*
* Inefficient way to convert UTF-8 to byte-array, but the code
* parallels the way it is done for objects.
+ * Special case for 1-byte (used by eg [puts] for the \n) could
+ * be extended to more efficient translation of the src string.
*/
- Tcl_Obj *objPtr;
- int result;
+ int result;
- objPtr = Tcl_NewStringObj(src, len);
- src = (char *) Tcl_GetByteArrayFromObj(objPtr, &len);
- result = WriteBytes(chanPtr, src, len);
- TclDecrRefCount(objPtr);
+ if ((len == 1) && (UCHAR(*src) < 0xC0)) {
+ result = WriteBytes(chanPtr, src, len);
+ } else {
+ Tcl_Obj *objPtr = Tcl_NewStringObj(src, len);
+ src = (char *) Tcl_GetByteArrayFromObj(objPtr, &len);
+ result = WriteBytes(chanPtr, src, len);
+ TclDecrRefCount(objPtr);
+ }
return result;
}
return WriteChars(chanPtr, src, len);
@@ -3294,7 +3300,7 @@ Tcl_WriteObj(
src = (char *) Tcl_GetByteArrayFromObj(objPtr, &srcLen);
return WriteBytes(chanPtr, src, srcLen);
} else {
- src = Tcl_GetStringFromObj(objPtr, &srcLen);
+ src = TclGetStringFromObj(objPtr, &srcLen);
return WriteChars(chanPtr, src, srcLen);
}
}
@@ -3809,7 +3815,7 @@ Tcl_Gets(
TclNewObj(objPtr);
charsStored = Tcl_GetsObj(chan, objPtr);
if (charsStored > 0) {
- string = Tcl_GetStringFromObj(objPtr, &length);
+ string = TclGetStringFromObj(objPtr, &length);
Tcl_DStringAppend(lineRead, string, length);
}
TclDecrRefCount(objPtr);
@@ -3866,6 +3872,18 @@ Tcl_GetsObj(
goto done;
}
+ /*
+ * A binary version of Tcl_GetsObj. This could also handle encodings
+ * that are ascii-7 pure (iso8859, utf-8, ...) with a final encoding
+ * conversion done on objPtr.
+ */
+
+ if ((statePtr->encoding == NULL)
+ && ((statePtr->inputTranslation == TCL_TRANSLATE_LF)
+ || (statePtr->inputTranslation == TCL_TRANSLATE_CR))) {
+ return TclGetsObjBinary(chan, objPtr);
+ }
+
bufPtr = statePtr->inQueueHead;
encoding = statePtr->encoding;
@@ -3874,7 +3892,7 @@ Tcl_GetsObj(
* newline in the available input.
*/
- Tcl_GetStringFromObj(objPtr, &oldLength);
+ TclGetStringFromObj(objPtr, &oldLength);
oldFlags = statePtr->inputEncodingFlags;
oldState = statePtr->inputEncodingState;
oldRemoved = BUFFER_PADDING;
@@ -4171,6 +4189,244 @@ Tcl_GetsObj(
/*
*---------------------------------------------------------------------------
*
+ * TclGetsObjBinary --
+ *
+ * A variation of Tcl_GetsObj that works directly on the buffers until
+ * end-of-line or end-of-file has been seen. Bytes read from the input
+ * channel return as a ByteArray obj.
+ *
+ * Results:
+ * Number of characters accumulated in the object or -1 if error,
+ * blocked, or EOF. If -1, use Tcl_GetErrno() to retrieve the POSIX error
+ * code for the error or condition that occurred.
+ *
+ * Side effects:
+ * Consumes input from the channel.
+ *
+ * On reading EOF, leave channel pointing at EOF char. On reading EOL,
+ * leave channel pointing after EOL, but don't return EOL in dst buffer.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+static int
+TclGetsObjBinary(
+ Tcl_Channel chan, /* Channel from which to read. */
+ Tcl_Obj *objPtr) /* The line read will be appended to this
+ * object as UTF-8 characters. */
+{
+ Channel *chanPtr = (Channel *) chan;
+ ChannelState *statePtr = chanPtr->state;
+ /* State info for channel */
+ ChannelBuffer *bufPtr;
+ int inEofChar, skip, copiedTotal, oldLength, oldFlags, oldRemoved;
+ int rawLen, byteLen, eolChar;
+ unsigned char *dst, *dstEnd, *eol, *eof, *byteArray;
+
+ bufPtr = statePtr->inQueueHead;
+
+ /*
+ * Preserved so we can restore the channel's state in case we don't find a
+ * newline in the available input.
+ */
+
+ byteArray = Tcl_GetByteArrayFromObj(objPtr, &byteLen);
+ oldFlags = statePtr->inputEncodingFlags;
+ oldRemoved = BUFFER_PADDING;
+ oldLength = byteLen;
+ if (bufPtr != NULL) {
+ oldRemoved = bufPtr->nextRemoved;
+ }
+
+ rawLen = 0;
+ skip = 0;
+ eof = NULL;
+ inEofChar = statePtr->inEofChar;
+ /* Only handle TCL_TRANSLATE_LF and TCL_TRANSLATE_CR */
+ eolChar = (statePtr->inputTranslation == TCL_TRANSLATE_LF) ? '\n' : '\r';
+
+ while (1) {
+ /*
+ * Subtract the number of bytes that were removed from channel
+ * buffer during last call.
+ */
+
+ if (bufPtr != NULL) {
+ bufPtr->nextRemoved += rawLen;
+ if (!IsBufferReady(bufPtr)) {
+ bufPtr = bufPtr->nextPtr;
+ }
+ }
+
+ if ((bufPtr == NULL) || (bufPtr->nextAdded == BUFFER_PADDING)) {
+ /*
+ * All channel buffers were exhausted and the caller still
+ * hasn't seen EOL. Need to read more bytes from the channel
+ * device. Side effect is to allocate another channel buffer.
+ */
+
+ if (statePtr->flags & CHANNEL_BLOCKED) {
+ if (statePtr->flags & CHANNEL_NONBLOCKING) {
+ goto restore;
+ }
+ statePtr->flags &= ~CHANNEL_BLOCKED;
+ }
+ if (GetInput(chanPtr) != 0) {
+ goto restore;
+ }
+ bufPtr = statePtr->inQueueTail;
+ }
+
+ dst = (unsigned char*) RemovePoint(bufPtr);
+ dstEnd = dst + BytesLeft(bufPtr);
+
+ /*
+ * Remember if EOF char is seen, then look for EOL anyhow, because the
+ * EOL might be before the EOF char.
+ * XXX - in the binary case, consider coincident search for eol/eof.
+ */
+
+ if (inEofChar != '\0') {
+ for (eol = dst; eol < dstEnd; eol++) {
+ if (*eol == inEofChar) {
+ dstEnd = eol;
+ eof = eol;
+ break;
+ }
+ }
+ }
+
+ /*
+ * On EOL, leave current file position pointing after the EOL, but
+ * don't store the EOL in the output string.
+ */
+
+ for (eol = dst; eol < dstEnd; eol++) {
+ if (*eol == eolChar) {
+ skip = 1;
+ goto gotEOL;
+ }
+ }
+ if (eof != NULL) {
+ /*
+ * EOF character was seen. On EOF, leave current file position
+ * pointing at the EOF character, but don't store the EOF
+ * character in the output string.
+ */
+
+ statePtr->flags |= CHANNEL_EOF | CHANNEL_STICKY_EOF;
+ statePtr->inputEncodingFlags |= TCL_ENCODING_END;
+ }
+ if (statePtr->flags & CHANNEL_EOF) {
+ skip = 0;
+ eol = dstEnd;
+ if ((dst == dstEnd) && (byteLen == oldLength)) {
+ /*
+ * If we didn't append any bytes before encountering EOF,
+ * caller needs to see -1.
+ */
+
+ byteArray = Tcl_SetByteArrayLength(objPtr, oldLength);
+ CommonGetsCleanup(chanPtr);
+ copiedTotal = -1;
+ goto done;
+ }
+ goto gotEOL;
+ }
+
+ /*
+ * Copy bytes from the channel buffer to the ByteArray.
+ * This may realloc space, so keep track of result.
+ */
+
+ rawLen = dstEnd - dst;
+ byteArray = Tcl_SetByteArrayLength(objPtr, byteLen + rawLen);
+ memcpy(byteArray + byteLen, dst, (size_t) rawLen);
+ byteLen += rawLen;
+ }
+
+ /*
+ * Found EOL or EOF, but the output buffer may now contain too many bytes.
+ * We need to know how many bytes correspond to the number we want, so we
+ * can remove the correct number of bytes from the channel buffer.
+ */
+
+ gotEOL:
+ if (bufPtr == NULL) {
+ Tcl_Panic("TclGetsObjBinary: gotEOL reached with bufPtr==NULL");
+ }
+
+ rawLen = eol - dst;
+ byteArray = Tcl_SetByteArrayLength(objPtr, byteLen + rawLen);
+ memcpy(byteArray + byteLen, dst, (size_t) rawLen);
+ byteLen += rawLen;
+ bufPtr->nextRemoved += rawLen + skip;
+
+ /*
+ * Convert the buffer if there was an encoding.
+ * XXX - unimplemented.
+ */
+
+ if (statePtr->encoding != NULL) {
+ }
+
+ /*
+ * Recycle all the emptied buffers.
+ */
+
+ CommonGetsCleanup(chanPtr);
+ statePtr->flags &= ~CHANNEL_BLOCKED;
+ copiedTotal = byteLen;
+ goto done;
+
+ /*
+ * Couldn't get a complete line. This only happens if we get a error
+ * reading from the channel or we are non-blocking and there wasn't an EOL
+ * or EOF in the data available.
+ */
+
+ restore:
+ bufPtr = statePtr->inQueueHead;
+ if (bufPtr == NULL) {
+ Tcl_Panic("TclGetsObjBinary: restore reached with bufPtr==NULL");
+ }
+ bufPtr->nextRemoved = oldRemoved;
+
+ for (bufPtr = bufPtr->nextPtr; bufPtr != NULL; bufPtr = bufPtr->nextPtr) {
+ bufPtr->nextRemoved = BUFFER_PADDING;
+ }
+ CommonGetsCleanup(chanPtr);
+
+ statePtr->inputEncodingFlags = oldFlags;
+ byteArray = Tcl_SetByteArrayLength(objPtr, oldLength);
+
+ /*
+ * We didn't get a complete line so we need to indicate to UpdateInterest
+ * that the gets blocked. It will wait for more data instead of firing a
+ * timer, avoiding a busy wait. This is where we are assuming that the
+ * next operation is a gets. No more file events will be delivered on this
+ * channel until new data arrives or some operation is performed on the
+ * channel (e.g. gets, read, fconfigure) that changes the blocking state.
+ * Note that this means a file event will not be delivered even though a
+ * read would be able to consume the buffered data.
+ */
+
+ statePtr->flags |= CHANNEL_NEED_MORE_DATA;
+ copiedTotal = -1;
+
+ /*
+ * Update the notifier state so we don't block while there is still data
+ * in the buffers.
+ */
+
+ done:
+ UpdateInterest(chanPtr);
+ return copiedTotal;
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
* FreeBinaryEncoding --
* Frees any "iso8859-1" Tcl_Encoding created by [gets] on a binary
* channel in a thread as part of that thread's finalization.
@@ -4224,7 +4480,7 @@ FilterInputBytes(
ChannelState *statePtr = chanPtr->state;
/* State info for channel */
ChannelBuffer *bufPtr;
- char *raw, *rawStart, *rawEnd, *dst;
+ char *raw, *rawStart, *dst;
int offset, toRead, dstNeeded, spaceLeft, result, rawLen, length;
Tcl_Obj *objPtr;
#define ENCODING_LINESIZE 20 /* Lower bound on how many bytes to convert at
@@ -4283,7 +4539,6 @@ FilterInputBytes(
rawStart = RemovePoint(bufPtr);
raw = rawStart;
- rawEnd = InsertPoint(bufPtr);
rawLen = BytesLeft(bufPtr);
dst = *gsPtr->dstPtr;
@@ -4839,14 +5094,14 @@ DoReadChars(
* been pure Unicode).
*/
- Tcl_GetString(objPtr);
+ TclGetString(objPtr);
}
offset = 0;
} else {
if (encoding == NULL) {
Tcl_GetByteArrayFromObj(objPtr, &offset);
} else {
- Tcl_GetStringFromObj(objPtr, &offset);
+ TclGetStringFromObj(objPtr, &offset);
}
}
@@ -8291,7 +8546,7 @@ CopyData(
buffer = csPtr->buffer;
sizeb = size;
} else {
- buffer = Tcl_GetStringFromObj(bufObj, &sizeb);
+ buffer = TclGetStringFromObj(bufObj, &sizeb);
}
if (outBinary || sameEncoding) {
@@ -10020,7 +10275,7 @@ FixLevelCode(
* !"error", !integer, integer != 1 (numeric code for error)
*/
- res = Tcl_GetIntFromObj(NULL, lv[i+1], &val);
+ res = TclGetIntFromObj(NULL, lv[i+1], &val);
if (((res == TCL_OK) && (val != 1)) || ((res != TCL_OK) &&
(0 != strcmp(TclGetString(lv[i+1]), "error")))) {
newcode = 1;
@@ -10030,7 +10285,7 @@ FixLevelCode(
* !integer, integer != 0
*/
- res = Tcl_GetIntFromObj(NULL, lv [i+1], &val);
+ res = TclGetIntFromObj(NULL, lv [i+1], &val);
if ((res != TCL_OK) || (val != 0)) {
newlevel = 0;
}