summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordgp <dgp@users.sourceforge.net>2014-11-15 21:08:57 (GMT)
committerdgp <dgp@users.sourceforge.net>2014-11-15 21:08:57 (GMT)
commit55688af5e97888536930067e5e23a686661a0b1c (patch)
tree7dfba7ed7d0c7b5acced0020322271a79aa378d2
parentc3c06423586daccaeba84d4aacf23f375121434c (diff)
downloadtcl-55688af5e97888536930067e5e23a686661a0b1c.zip
tcl-55688af5e97888536930067e5e23a686661a0b1c.tar.gz
tcl-55688af5e97888536930067e5e23a686661a0b1c.tar.bz2
Tcl_ExternalToUtf appends a terminating NUL to its encoded results.
Perhaps this is a welcome convenience for some callers, but not for Tcl's I/O system, which has no need for that. Added a new flag value TCL_ENCODING_NO_TERMINATE that callers can use to suppress this behavior. This means buffers don't require so much padding, and a tiny bit of processing is saved. Update I/O callers to use the feature.
-rw-r--r--generic/tcl.h17
-rw-r--r--generic/tclEncoding.c22
-rw-r--r--generic/tclIO.c44
3 files changed, 55 insertions, 28 deletions
diff --git a/generic/tcl.h b/generic/tcl.h
index fc477f2..95f2b3f 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -2144,11 +2144,28 @@ typedef struct Tcl_EncodingType {
* substituting one or more "close" characters in
* the destination buffer and then continue to
* convert the source.
+ * TCL_ENCODING_NO_TERMINATE - If set, Tcl_ExternalToUtf will not append a
+ * terminating NUL byte. Knowing that it will
+ * not need space to do so, it will fill all
+ * dstLen bytes with encoded UTF-8 content, as
+ * other circumstances permit. If clear, the
+ * default behavior is to reserve a byte in
+ * the dst space for NUL termination, and to
+ * append the NUL byte.
+ * TCL_ENCODING_CHAR_LIMIT - If set and dstCharsPtr is not NULL, then
+ * Tcl_ExternalToUtf takes the initial value
+ * of *dstCharsPtr is taken as a limit of the
+ * maximum number of chars to produce in the
+ * encoded UTF-8 content. Otherwise, the
+ * number of chars produced is controlled only
+ * by other limiting factors.
*/
#define TCL_ENCODING_START 0x01
#define TCL_ENCODING_END 0x02
#define TCL_ENCODING_STOPONERROR 0x04
+#define TCL_ENCODING_NO_TERMINATE 0x08
+#define TCL_ENCODING_CHAR_LIMIT 0x10
/*
* The following definitions are the error codes returned by the conversion
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index d246cb2..0446816 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -1204,6 +1204,7 @@ Tcl_ExternalToUtf(
{
const Encoding *encodingPtr;
int result, srcRead, dstWrote, dstChars;
+ int noTerminate = flags & TCL_ENCODING_NO_TERMINATE;
Tcl_EncodingState state;
if (encoding == NULL) {
@@ -1230,17 +1231,24 @@ Tcl_ExternalToUtf(
dstCharsPtr = &dstChars;
}
- /*
- * If there are any null characters in the middle of the buffer, they will
- * converted to the UTF-8 null character (\xC080). To get the actual \0 at
- * the end of the destination buffer, we need to append it manually.
- */
+ if (!noTerminate) {
+ /*
+ * If there are any null characters in the middle of the buffer,
+ * they will converted to the UTF-8 null character (\xC080). To get
+ * the actual \0 at the end of the destination buffer, we need to
+ * append it manually. First make room for it...
+ */
- dstLen--;
+ dstLen--;
+ }
result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen,
flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
dstCharsPtr);
- dst[*dstWrotePtr] = '\0';
+ if (!noTerminate) {
+ /* ...and then append it */
+
+ dst[*dstWrotePtr] = '\0';
+ }
return result;
}
diff --git a/generic/tclIO.c b/generic/tclIO.c
index 2025742..b759c0e 100644
--- a/generic/tclIO.c
+++ b/generic/tclIO.c
@@ -4578,14 +4578,14 @@ Tcl_GetsObj(
* Skip the raw bytes that make up the '\n'.
*/
- char tmp[1 + TCL_UTF_MAX];
+ char tmp[TCL_UTF_MAX];
int rawRead;
bufPtr = gs.bufPtr;
Tcl_ExternalToUtf(NULL, gs.encoding, RemovePoint(bufPtr),
- gs.rawRead, statePtr->inputEncodingFlags,
- &gs.state, tmp, 1 + TCL_UTF_MAX, &rawRead, NULL,
- NULL);
+ gs.rawRead, statePtr->inputEncodingFlags
+ | TCL_ENCODING_NO_TERMINATE, &gs.state, tmp,
+ TCL_UTF_MAX, &rawRead, NULL, NULL);
bufPtr->nextRemoved += rawRead;
gs.rawRead -= rawRead;
gs.bytesWrote--;
@@ -4686,8 +4686,9 @@ Tcl_GetsObj(
}
statePtr->inputEncodingState = gs.state;
Tcl_ExternalToUtf(NULL, gs.encoding, RemovePoint(bufPtr), gs.rawRead,
- statePtr->inputEncodingFlags, &statePtr->inputEncodingState, dst,
- eol - dst + skip + TCL_UTF_MAX, &gs.rawRead, NULL,
+ statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE,
+ &statePtr->inputEncodingState, dst,
+ eol - dst + skip + TCL_UTF_MAX - 1, &gs.rawRead, NULL,
&gs.charsWrote);
bufPtr->nextRemoved += gs.rawRead;
@@ -5219,9 +5220,9 @@ FilterInputBytes(
}
gsPtr->state = statePtr->inputEncodingState;
result = Tcl_ExternalToUtf(NULL, gsPtr->encoding, raw, rawLen,
- statePtr->inputEncodingFlags, &statePtr->inputEncodingState,
- dst, spaceLeft+1, &gsPtr->rawRead, &gsPtr->bytesWrote,
- &gsPtr->charsWrote);
+ statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE,
+ &statePtr->inputEncodingState, dst, spaceLeft, &gsPtr->rawRead,
+ &gsPtr->bytesWrote, &gsPtr->charsWrote);
/*
* Make sure that if we go through 'gets', that we reset the
@@ -5975,7 +5976,7 @@ ReadChars(
* a consistent set of results. This takes the shape of a loop.
*/
- dstLimit = dstNeeded + 1;
+ dstLimit = dstNeeded;
while (1) {
int dstDecoded, dstRead, dstWrote, srcRead, numChars;
@@ -5985,9 +5986,10 @@ ReadChars(
*/
int code = Tcl_ExternalToUtf(NULL, encoding, src, srcLen,
- statePtr->inputEncodingFlags & (bufPtr->nextPtr
- ? ~0 : ~TCL_ENCODING_END), &statePtr->inputEncodingState,
- dst, dstLimit, &srcRead, &dstDecoded, &numChars);
+ (statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE)
+ & (bufPtr->nextPtr ? ~0 : ~TCL_ENCODING_END),
+ &statePtr->inputEncodingState, dst, dstLimit, &srcRead,
+ &dstDecoded, &numChars);
/*
* Perform the translation transformation in place. Read no more
@@ -6050,7 +6052,7 @@ ReadChars(
* time.
*/
- dstLimit = dstRead + TCL_UTF_MAX;
+ dstLimit = dstRead - 1 + TCL_UTF_MAX;
statePtr->flags = savedFlags;
statePtr->inputEncodingFlags = savedIEFlags;
statePtr->inputEncodingState = savedState;
@@ -6076,7 +6078,7 @@ ReadChars(
* up back here in this call.
*/
- dstLimit = dstRead + TCL_UTF_MAX;
+ dstLimit = dstRead - 1 + TCL_UTF_MAX;
statePtr->flags = savedFlags;
statePtr->inputEncodingFlags = savedIEFlags;
statePtr->inputEncodingState = savedState;
@@ -6093,7 +6095,7 @@ ReadChars(
*/
if (code != TCL_OK) {
- char buffer[TCL_UTF_MAX + 2];
+ char buffer[TCL_UTF_MAX + 1];
int read, decoded, count;
/*
@@ -6105,9 +6107,10 @@ ReadChars(
statePtr->inputEncodingState = savedState;
Tcl_ExternalToUtf(NULL, encoding, src, srcLen,
- statePtr->inputEncodingFlags & (bufPtr->nextPtr
- ? ~0 : ~TCL_ENCODING_END), &statePtr->inputEncodingState,
- buffer, TCL_UTF_MAX + 2, &read, &decoded, &count);
+ (statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE)
+ & (bufPtr->nextPtr ? ~0 : ~TCL_ENCODING_END),
+ &statePtr->inputEncodingState, buffer, TCL_UTF_MAX + 1,
+ &read, &decoded, &count);
if (count == 2) {
if (buffer[1] == '\n') {
@@ -6119,7 +6122,6 @@ ReadChars(
bufPtr->nextRemoved += srcRead;
}
- dst[1] = '\0';
statePtr->inputEncodingFlags &= ~TCL_ENCODING_START;
Tcl_SetObjLength(objPtr, numBytes + 1);
@@ -6166,7 +6168,7 @@ ReadChars(
* Tcl_ExternalToUtf() call!
*/
- dstLimit = Tcl_UtfAtIndex(dst, charsToRead) + TCL_UTF_MAX - dst;
+ dstLimit = Tcl_UtfAtIndex(dst, charsToRead) - 1 + TCL_UTF_MAX - dst;
statePtr->flags = savedFlags;
statePtr->inputEncodingFlags = savedIEFlags;
statePtr->inputEncodingState = savedState;