summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
Diffstat (limited to 'generic')
-rw-r--r--generic/tcl.h17
-rw-r--r--generic/tclEncoding.c46
-rw-r--r--generic/tclIO.c51
3 files changed, 78 insertions, 36 deletions
diff --git a/generic/tcl.h b/generic/tcl.h
index fc477f2..95f2b3f 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -2144,11 +2144,28 @@ typedef struct Tcl_EncodingType {
* substituting one or more "close" characters in
* the destination buffer and then continue to
* convert the source.
+ * TCL_ENCODING_NO_TERMINATE - If set, Tcl_ExternalToUtf will not append a
+ * terminating NUL byte. Knowing that it will
+ * not need space to do so, it will fill all
+ * dstLen bytes with encoded UTF-8 content, as
+ * other circumstances permit. If clear, the
+ * default behavior is to reserve a byte in
+ * the dst space for NUL termination, and to
+ * append the NUL byte.
+ * TCL_ENCODING_CHAR_LIMIT - If set and dstCharsPtr is not NULL, then
+ * Tcl_ExternalToUtf takes the initial value
+ * of *dstCharsPtr is taken as a limit of the
+ * maximum number of chars to produce in the
+ * encoded UTF-8 content. Otherwise, the
+ * number of chars produced is controlled only
+ * by other limiting factors.
*/
#define TCL_ENCODING_START 0x01
#define TCL_ENCODING_END 0x02
#define TCL_ENCODING_STOPONERROR 0x04
+#define TCL_ENCODING_NO_TERMINATE 0x08
+#define TCL_ENCODING_CHAR_LIMIT 0x10
/*
* The following definitions are the error codes returned by the conversion
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 95c59c0..2a766d1 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -1206,7 +1206,10 @@ Tcl_ExternalToUtf(
* output buffer. */
{
const Encoding *encodingPtr;
- int result, srcRead, dstWrote, dstChars;
+ int result, srcRead, dstWrote, dstChars = 0;
+ int noTerminate = flags & TCL_ENCODING_NO_TERMINATE;
+ int charLimited = (flags & TCL_ENCODING_CHAR_LIMIT) && dstCharsPtr;
+ int maxChars = INT_MAX;
Tcl_EncodingState state;
if (encoding == NULL) {
@@ -1231,19 +1234,40 @@ Tcl_ExternalToUtf(
}
if (dstCharsPtr == NULL) {
dstCharsPtr = &dstChars;
+ flags &= ~TCL_ENCODING_CHAR_LIMIT;
+ } else if (charLimited) {
+ maxChars = *dstCharsPtr;
}
- /*
- * If there are any null characters in the middle of the buffer, they will
- * converted to the UTF-8 null character (\xC080). To get the actual \0 at
- * the end of the destination buffer, we need to append it manually.
- */
+ if (!noTerminate) {
+ /*
+ * If there are any null characters in the middle of the buffer,
+ * they will converted to the UTF-8 null character (\xC080). To get
+ * the actual \0 at the end of the destination buffer, we need to
+ * append it manually. First make room for it...
+ */
- dstLen--;
- result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen,
- flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
- dstCharsPtr);
- dst[*dstWrotePtr] = '\0';
+ dstLen--;
+ }
+ do {
+ int savedFlags = flags;
+ Tcl_EncodingState savedState = *statePtr;
+
+ result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen,
+ flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
+ dstCharsPtr);
+ if (*dstCharsPtr <= maxChars) {
+ break;
+ }
+ dstLen = Tcl_UtfAtIndex(dst, maxChars) - 1 - dst + TCL_UTF_MAX;
+ flags = savedFlags;
+ *statePtr = savedState;
+ } while (1);
+ if (!noTerminate) {
+ /* ...and then append it */
+
+ dst[*dstWrotePtr] = '\0';
+ }
return result;
}
diff --git a/generic/tclIO.c b/generic/tclIO.c
index 8a35aee..79aa667 100644
--- a/generic/tclIO.c
+++ b/generic/tclIO.c
@@ -4578,14 +4578,14 @@ Tcl_GetsObj(
* Skip the raw bytes that make up the '\n'.
*/
- char tmp[1 + TCL_UTF_MAX];
+ char tmp[TCL_UTF_MAX];
int rawRead;
bufPtr = gs.bufPtr;
Tcl_ExternalToUtf(NULL, gs.encoding, RemovePoint(bufPtr),
- gs.rawRead, statePtr->inputEncodingFlags,
- &gs.state, tmp, 1 + TCL_UTF_MAX, &rawRead, NULL,
- NULL);
+ gs.rawRead, statePtr->inputEncodingFlags
+ | TCL_ENCODING_NO_TERMINATE, &gs.state, tmp,
+ TCL_UTF_MAX, &rawRead, NULL, NULL);
bufPtr->nextRemoved += rawRead;
gs.rawRead -= rawRead;
gs.bytesWrote--;
@@ -4686,8 +4686,9 @@ Tcl_GetsObj(
}
statePtr->inputEncodingState = gs.state;
Tcl_ExternalToUtf(NULL, gs.encoding, RemovePoint(bufPtr), gs.rawRead,
- statePtr->inputEncodingFlags, &statePtr->inputEncodingState, dst,
- eol - dst + skip + TCL_UTF_MAX, &gs.rawRead, NULL,
+ statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE,
+ &statePtr->inputEncodingState, dst,
+ eol - dst + skip + TCL_UTF_MAX - 1, &gs.rawRead, NULL,
&gs.charsWrote);
bufPtr->nextRemoved += gs.rawRead;
@@ -5219,9 +5220,9 @@ FilterInputBytes(
}
gsPtr->state = statePtr->inputEncodingState;
result = Tcl_ExternalToUtf(NULL, gsPtr->encoding, raw, rawLen,
- statePtr->inputEncodingFlags, &statePtr->inputEncodingState,
- dst, spaceLeft+1, &gsPtr->rawRead, &gsPtr->bytesWrote,
- &gsPtr->charsWrote);
+ statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE,
+ &statePtr->inputEncodingState, dst, spaceLeft, &gsPtr->rawRead,
+ &gsPtr->bytesWrote, &gsPtr->charsWrote);
/*
* Make sure that if we go through 'gets', that we reset the
@@ -5928,7 +5929,7 @@ ReadChars(
int savedIEFlags = statePtr->inputEncodingFlags;
int savedFlags = statePtr->flags;
char *dst, *src = RemovePoint(bufPtr);
- int dstLimit, numBytes, srcLen = BytesLeft(bufPtr);
+ int numBytes, srcLen = BytesLeft(bufPtr);
/*
* One src byte can yield at most one character. So when the
@@ -5947,14 +5948,14 @@ ReadChars(
*/
int factor = *factorPtr;
- int dstNeeded = TCL_UTF_MAX - 1 + toRead * factor / UTF_EXPANSION_FACTOR;
+ int dstLimit = TCL_UTF_MAX - 1 + toRead * factor / UTF_EXPANSION_FACTOR;
(void) TclGetStringFromObj(objPtr, &numBytes);
- Tcl_AppendToObj(objPtr, NULL, dstNeeded);
+ Tcl_AppendToObj(objPtr, NULL, dstLimit);
if (toRead == srcLen) {
unsigned int size;
dst = TclGetStringStorage(objPtr, &size) + numBytes;
- dstNeeded = size - numBytes;
+ dstLimit = size - numBytes;
} else {
dst = TclGetString(objPtr) + numBytes;
}
@@ -5975,7 +5976,6 @@ ReadChars(
* a consistent set of results. This takes the shape of a loop.
*/
- dstLimit = dstNeeded + 1;
while (1) {
int dstDecoded, dstRead, dstWrote, srcRead, numChars;
@@ -5985,9 +5985,10 @@ ReadChars(
*/
int code = Tcl_ExternalToUtf(NULL, encoding, src, srcLen,
- statePtr->inputEncodingFlags & (bufPtr->nextPtr
- ? ~0 : ~TCL_ENCODING_END), &statePtr->inputEncodingState,
- dst, dstLimit, &srcRead, &dstDecoded, &numChars);
+ (statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE)
+ & (bufPtr->nextPtr ? ~0 : ~TCL_ENCODING_END),
+ &statePtr->inputEncodingState, dst, dstLimit, &srcRead,
+ &dstDecoded, &numChars);
/*
* Perform the translation transformation in place. Read no more
@@ -6050,7 +6051,7 @@ ReadChars(
* time.
*/
- dstLimit = dstRead + TCL_UTF_MAX;
+ dstLimit = dstRead - 1 + TCL_UTF_MAX;
statePtr->flags = savedFlags;
statePtr->inputEncodingFlags = savedIEFlags;
statePtr->inputEncodingState = savedState;
@@ -6076,7 +6077,7 @@ ReadChars(
* up back here in this call.
*/
- dstLimit = dstRead + TCL_UTF_MAX;
+ dstLimit = dstRead - 1 + TCL_UTF_MAX;
statePtr->flags = savedFlags;
statePtr->inputEncodingFlags = savedIEFlags;
statePtr->inputEncodingState = savedState;
@@ -6093,7 +6094,7 @@ ReadChars(
*/
if (code != TCL_OK) {
- char buffer[TCL_UTF_MAX + 2];
+ char buffer[TCL_UTF_MAX + 1];
int read, decoded, count;
/*
@@ -6105,9 +6106,10 @@ ReadChars(
statePtr->inputEncodingState = savedState;
Tcl_ExternalToUtf(NULL, encoding, src, srcLen,
- statePtr->inputEncodingFlags & (bufPtr->nextPtr
- ? ~0 : ~TCL_ENCODING_END), &statePtr->inputEncodingState,
- buffer, TCL_UTF_MAX + 2, &read, &decoded, &count);
+ (statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE)
+ & (bufPtr->nextPtr ? ~0 : ~TCL_ENCODING_END),
+ &statePtr->inputEncodingState, buffer, TCL_UTF_MAX + 1,
+ &read, &decoded, &count);
if (count == 2) {
if (buffer[1] == '\n') {
@@ -6119,7 +6121,6 @@ ReadChars(
bufPtr->nextRemoved += srcRead;
}
- dst[1] = '\0';
statePtr->inputEncodingFlags &= ~TCL_ENCODING_START;
Tcl_SetObjLength(objPtr, numBytes + 1);
@@ -6166,7 +6167,7 @@ ReadChars(
* Tcl_ExternalToUtf() call!
*/
- dstLimit = Tcl_UtfAtIndex(dst, charsToRead) + TCL_UTF_MAX - dst;
+ dstLimit = Tcl_UtfAtIndex(dst, charsToRead) - 1 + TCL_UTF_MAX - dst;
statePtr->flags = savedFlags;
statePtr->inputEncodingFlags = savedIEFlags;
statePtr->inputEncodingState = savedState;