summaryrefslogtreecommitdiffstats
path: root/generic/tclIO.c
diff options
context:
space:
mode:
authordgp@users.sourceforge.net <dgp>2014-12-23 18:39:35 (GMT)
committerdgp@users.sourceforge.net <dgp>2014-12-23 18:39:35 (GMT)
commit5da1cb6f0287acbf6700dcafd43a54fdb5fccb45 (patch)
tree94a25cd0a1f31aa5532c59bd74202b7469ab8311 /generic/tclIO.c
parentdd69f66f843277cf2c1c7b45b80c9741e3eb0045 (diff)
parentaf48d8bf01566f64a168a6c4ee5bdb5ebfd359fd (diff)
downloadtcl-5da1cb6f0287acbf6700dcafd43a54fdb5fccb45.zip
tcl-5da1cb6f0287acbf6700dcafd43a54fdb5fccb45.tar.gz
tcl-5da1cb6f0287acbf6700dcafd43a54fdb5fccb45.tar.bz2
Add two new (undocumented) flags to the Tcl_ExternalToUtf() interface.
TCL_ENCODING_NO_TERMINATE rejects the default behavior of appending a terminating NUL byte to the produced Utf output. This permits use of all of the dstLen bytes provided, and simplifies the buffer size calculations demanded from callers. Perhaps some callers need or appreciate this default behavior, but for Tcl's own main use of encodings - conversions within I/O - this just gets in the way. TCL_ENCODING_CHAR_LIMIT lets the caller set a limit on the number of chars to be output to be enforced by the encoding routines themselves. Without this, callers have to check after the fact for going beyond limits and make multiple encoding calls in a trial and error approach. Full compatibility is supported. No defaults are changed, and the flags have their effect even if an encoding driver has not been written to support these flags (but greater efficiency is enjoyed if they do!). All of Tcl's own encoding drivers are updated to support this. Other encoding drivers may exist somewhere, but I cannot point to any. A TIP to document this and make it officially supported may come in time.
Diffstat (limited to 'generic/tclIO.c')
-rw-r--r--generic/tclIO.c62
1 files changed, 35 insertions, 27 deletions
diff --git a/generic/tclIO.c b/generic/tclIO.c
index 8a35aee..9bbf2a6 100644
--- a/generic/tclIO.c
+++ b/generic/tclIO.c
@@ -4578,14 +4578,14 @@ Tcl_GetsObj(
* Skip the raw bytes that make up the '\n'.
*/
- char tmp[1 + TCL_UTF_MAX];
+ char tmp[TCL_UTF_MAX];
int rawRead;
bufPtr = gs.bufPtr;
Tcl_ExternalToUtf(NULL, gs.encoding, RemovePoint(bufPtr),
- gs.rawRead, statePtr->inputEncodingFlags,
- &gs.state, tmp, 1 + TCL_UTF_MAX, &rawRead, NULL,
- NULL);
+ gs.rawRead, statePtr->inputEncodingFlags
+ | TCL_ENCODING_NO_TERMINATE, &gs.state, tmp,
+ TCL_UTF_MAX, &rawRead, NULL, NULL);
bufPtr->nextRemoved += rawRead;
gs.rawRead -= rawRead;
gs.bytesWrote--;
@@ -4686,8 +4686,9 @@ Tcl_GetsObj(
}
statePtr->inputEncodingState = gs.state;
Tcl_ExternalToUtf(NULL, gs.encoding, RemovePoint(bufPtr), gs.rawRead,
- statePtr->inputEncodingFlags, &statePtr->inputEncodingState, dst,
- eol - dst + skip + TCL_UTF_MAX, &gs.rawRead, NULL,
+ statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE,
+ &statePtr->inputEncodingState, dst,
+ eol - dst + skip + TCL_UTF_MAX - 1, &gs.rawRead, NULL,
&gs.charsWrote);
bufPtr->nextRemoved += gs.rawRead;
@@ -5219,9 +5220,9 @@ FilterInputBytes(
}
gsPtr->state = statePtr->inputEncodingState;
result = Tcl_ExternalToUtf(NULL, gsPtr->encoding, raw, rawLen,
- statePtr->inputEncodingFlags, &statePtr->inputEncodingState,
- dst, spaceLeft+1, &gsPtr->rawRead, &gsPtr->bytesWrote,
- &gsPtr->charsWrote);
+ statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE,
+ &statePtr->inputEncodingState, dst, spaceLeft, &gsPtr->rawRead,
+ &gsPtr->bytesWrote, &gsPtr->charsWrote);
/*
* Make sure that if we go through 'gets', that we reset the
@@ -5928,7 +5929,7 @@ ReadChars(
int savedIEFlags = statePtr->inputEncodingFlags;
int savedFlags = statePtr->flags;
char *dst, *src = RemovePoint(bufPtr);
- int dstLimit, numBytes, srcLen = BytesLeft(bufPtr);
+ int numBytes, srcLen = BytesLeft(bufPtr);
/*
* One src byte can yield at most one character. So when the
@@ -5947,14 +5948,14 @@ ReadChars(
*/
int factor = *factorPtr;
- int dstNeeded = TCL_UTF_MAX - 1 + toRead * factor / UTF_EXPANSION_FACTOR;
+ int dstLimit = TCL_UTF_MAX - 1 + toRead * factor / UTF_EXPANSION_FACTOR;
(void) TclGetStringFromObj(objPtr, &numBytes);
- Tcl_AppendToObj(objPtr, NULL, dstNeeded);
+ Tcl_AppendToObj(objPtr, NULL, dstLimit);
if (toRead == srcLen) {
unsigned int size;
dst = TclGetStringStorage(objPtr, &size) + numBytes;
- dstNeeded = size - numBytes;
+ dstLimit = size - numBytes;
} else {
dst = TclGetString(objPtr) + numBytes;
}
@@ -5975,19 +5976,24 @@ ReadChars(
* a consistent set of results. This takes the shape of a loop.
*/
- dstLimit = dstNeeded + 1;
while (1) {
- int dstDecoded, dstRead, dstWrote, srcRead, numChars;
+ int dstDecoded, dstRead, dstWrote, srcRead, numChars, code;
+ int flags = statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE;
+
+ if (charsToRead > 0) {
+ flags |= TCL_ENCODING_CHAR_LIMIT;
+ numChars = charsToRead;
+ }
/*
* Perform the encoding transformation. Read no more than
* srcLen bytes, write no more than dstLimit bytes.
*/
- int code = Tcl_ExternalToUtf(NULL, encoding, src, srcLen,
- statePtr->inputEncodingFlags & (bufPtr->nextPtr
- ? ~0 : ~TCL_ENCODING_END), &statePtr->inputEncodingState,
- dst, dstLimit, &srcRead, &dstDecoded, &numChars);
+ code = Tcl_ExternalToUtf(NULL, encoding, src, srcLen,
+ flags & (bufPtr->nextPtr ? ~0 : ~TCL_ENCODING_END),
+ &statePtr->inputEncodingState, dst, dstLimit, &srcRead,
+ &dstDecoded, &numChars);
/*
* Perform the translation transformation in place. Read no more
@@ -6050,7 +6056,7 @@ ReadChars(
* time.
*/
- dstLimit = dstRead + TCL_UTF_MAX;
+ dstLimit = dstRead - 1 + TCL_UTF_MAX;
statePtr->flags = savedFlags;
statePtr->inputEncodingFlags = savedIEFlags;
statePtr->inputEncodingState = savedState;
@@ -6076,7 +6082,7 @@ ReadChars(
* up back here in this call.
*/
- dstLimit = dstRead + TCL_UTF_MAX;
+ dstLimit = dstRead - 1 + TCL_UTF_MAX;
statePtr->flags = savedFlags;
statePtr->inputEncodingFlags = savedIEFlags;
statePtr->inputEncodingState = savedState;
@@ -6093,7 +6099,7 @@ ReadChars(
*/
if (code != TCL_OK) {
- char buffer[TCL_UTF_MAX + 2];
+ char buffer[TCL_UTF_MAX + 1];
int read, decoded, count;
/*
@@ -6105,9 +6111,10 @@ ReadChars(
statePtr->inputEncodingState = savedState;
Tcl_ExternalToUtf(NULL, encoding, src, srcLen,
- statePtr->inputEncodingFlags & (bufPtr->nextPtr
- ? ~0 : ~TCL_ENCODING_END), &statePtr->inputEncodingState,
- buffer, TCL_UTF_MAX + 2, &read, &decoded, &count);
+ (statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE)
+ & (bufPtr->nextPtr ? ~0 : ~TCL_ENCODING_END),
+ &statePtr->inputEncodingState, buffer, TCL_UTF_MAX + 1,
+ &read, &decoded, &count);
if (count == 2) {
if (buffer[1] == '\n') {
@@ -6119,7 +6126,6 @@ ReadChars(
bufPtr->nextRemoved += srcRead;
}
- dst[1] = '\0';
statePtr->inputEncodingFlags &= ~TCL_ENCODING_START;
Tcl_SetObjLength(objPtr, numBytes + 1);
@@ -6160,13 +6166,15 @@ ReadChars(
if (charsToRead > 0 && numChars > charsToRead) {
/*
+ * TODO: This cannot happen anymore.
+ *
* We read more chars than allowed. Reset limits to
* prevent that and try again. Don't forget the extra
* padding of TCL_UTF_MAX bytes demanded by the
* Tcl_ExternalToUtf() call!
*/
- dstLimit = Tcl_UtfAtIndex(dst, charsToRead) + TCL_UTF_MAX - dst;
+ dstLimit = Tcl_UtfAtIndex(dst, charsToRead) - 1 + TCL_UTF_MAX - dst;
statePtr->flags = savedFlags;
statePtr->inputEncodingFlags = savedIEFlags;
statePtr->inputEncodingState = savedState;