diff options
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tcl.h | 17 | ||||
-rw-r--r-- | generic/tclEncoding.c | 46 | ||||
-rw-r--r-- | generic/tclIO.c | 51 |
3 files changed, 78 insertions, 36 deletions
diff --git a/generic/tcl.h b/generic/tcl.h index fc477f2..95f2b3f 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2144,11 +2144,28 @@ typedef struct Tcl_EncodingType { * substituting one or more "close" characters in * the destination buffer and then continue to * convert the source. + * TCL_ENCODING_NO_TERMINATE - If set, Tcl_ExternalToUtf will not append a + * terminating NUL byte. Knowing that it will + * not need space to do so, it will fill all + * dstLen bytes with encoded UTF-8 content, as + * other circumstances permit. If clear, the + * default behavior is to reserve a byte in + * the dst space for NUL termination, and to + * append the NUL byte. + * TCL_ENCODING_CHAR_LIMIT - If set and dstCharsPtr is not NULL, then + * Tcl_ExternalToUtf takes the initial value + * of *dstCharsPtr is taken as a limit of the + * maximum number of chars to produce in the + * encoded UTF-8 content. Otherwise, the + * number of chars produced is controlled only + * by other limiting factors. */ #define TCL_ENCODING_START 0x01 #define TCL_ENCODING_END 0x02 #define TCL_ENCODING_STOPONERROR 0x04 +#define TCL_ENCODING_NO_TERMINATE 0x08 +#define TCL_ENCODING_CHAR_LIMIT 0x10 /* * The following definitions are the error codes returned by the conversion diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 95c59c0..2a766d1 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1206,7 +1206,10 @@ Tcl_ExternalToUtf( * output buffer. */ { const Encoding *encodingPtr; - int result, srcRead, dstWrote, dstChars; + int result, srcRead, dstWrote, dstChars = 0; + int noTerminate = flags & TCL_ENCODING_NO_TERMINATE; + int charLimited = (flags & TCL_ENCODING_CHAR_LIMIT) && dstCharsPtr; + int maxChars = INT_MAX; Tcl_EncodingState state; if (encoding == NULL) { @@ -1231,19 +1234,40 @@ Tcl_ExternalToUtf( } if (dstCharsPtr == NULL) { dstCharsPtr = &dstChars; + flags &= ~TCL_ENCODING_CHAR_LIMIT; + } else if (charLimited) { + maxChars = *dstCharsPtr; } - /* - * If there are any null characters in the middle of the buffer, they will - * converted to the UTF-8 null character (\xC080). To get the actual \0 at - * the end of the destination buffer, we need to append it manually. - */ + if (!noTerminate) { + /* + * If there are any null characters in the middle of the buffer, + * they will converted to the UTF-8 null character (\xC080). To get + * the actual \0 at the end of the destination buffer, we need to + * append it manually. First make room for it... + */ - dstLen--; - result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen, - flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, - dstCharsPtr); - dst[*dstWrotePtr] = '\0'; + dstLen--; + } + do { + int savedFlags = flags; + Tcl_EncodingState savedState = *statePtr; + + result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen, + flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, + dstCharsPtr); + if (*dstCharsPtr <= maxChars) { + break; + } + dstLen = Tcl_UtfAtIndex(dst, maxChars) - 1 - dst + TCL_UTF_MAX; + flags = savedFlags; + *statePtr = savedState; + } while (1); + if (!noTerminate) { + /* ...and then append it */ + + dst[*dstWrotePtr] = '\0'; + } return result; } diff --git a/generic/tclIO.c b/generic/tclIO.c index 8a35aee..79aa667 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -4578,14 +4578,14 @@ Tcl_GetsObj( * Skip the raw bytes that make up the '\n'. */ - char tmp[1 + TCL_UTF_MAX]; + char tmp[TCL_UTF_MAX]; int rawRead; bufPtr = gs.bufPtr; Tcl_ExternalToUtf(NULL, gs.encoding, RemovePoint(bufPtr), - gs.rawRead, statePtr->inputEncodingFlags, - &gs.state, tmp, 1 + TCL_UTF_MAX, &rawRead, NULL, - NULL); + gs.rawRead, statePtr->inputEncodingFlags + | TCL_ENCODING_NO_TERMINATE, &gs.state, tmp, + TCL_UTF_MAX, &rawRead, NULL, NULL); bufPtr->nextRemoved += rawRead; gs.rawRead -= rawRead; gs.bytesWrote--; @@ -4686,8 +4686,9 @@ Tcl_GetsObj( } statePtr->inputEncodingState = gs.state; Tcl_ExternalToUtf(NULL, gs.encoding, RemovePoint(bufPtr), gs.rawRead, - statePtr->inputEncodingFlags, &statePtr->inputEncodingState, dst, - eol - dst + skip + TCL_UTF_MAX, &gs.rawRead, NULL, + statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE, + &statePtr->inputEncodingState, dst, + eol - dst + skip + TCL_UTF_MAX - 1, &gs.rawRead, NULL, &gs.charsWrote); bufPtr->nextRemoved += gs.rawRead; @@ -5219,9 +5220,9 @@ FilterInputBytes( } gsPtr->state = statePtr->inputEncodingState; result = Tcl_ExternalToUtf(NULL, gsPtr->encoding, raw, rawLen, - statePtr->inputEncodingFlags, &statePtr->inputEncodingState, - dst, spaceLeft+1, &gsPtr->rawRead, &gsPtr->bytesWrote, - &gsPtr->charsWrote); + statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE, + &statePtr->inputEncodingState, dst, spaceLeft, &gsPtr->rawRead, + &gsPtr->bytesWrote, &gsPtr->charsWrote); /* * Make sure that if we go through 'gets', that we reset the @@ -5928,7 +5929,7 @@ ReadChars( int savedIEFlags = statePtr->inputEncodingFlags; int savedFlags = statePtr->flags; char *dst, *src = RemovePoint(bufPtr); - int dstLimit, numBytes, srcLen = BytesLeft(bufPtr); + int numBytes, srcLen = BytesLeft(bufPtr); /* * One src byte can yield at most one character. So when the @@ -5947,14 +5948,14 @@ ReadChars( */ int factor = *factorPtr; - int dstNeeded = TCL_UTF_MAX - 1 + toRead * factor / UTF_EXPANSION_FACTOR; + int dstLimit = TCL_UTF_MAX - 1 + toRead * factor / UTF_EXPANSION_FACTOR; (void) TclGetStringFromObj(objPtr, &numBytes); - Tcl_AppendToObj(objPtr, NULL, dstNeeded); + Tcl_AppendToObj(objPtr, NULL, dstLimit); if (toRead == srcLen) { unsigned int size; dst = TclGetStringStorage(objPtr, &size) + numBytes; - dstNeeded = size - numBytes; + dstLimit = size - numBytes; } else { dst = TclGetString(objPtr) + numBytes; } @@ -5975,7 +5976,6 @@ ReadChars( * a consistent set of results. This takes the shape of a loop. */ - dstLimit = dstNeeded + 1; while (1) { int dstDecoded, dstRead, dstWrote, srcRead, numChars; @@ -5985,9 +5985,10 @@ ReadChars( */ int code = Tcl_ExternalToUtf(NULL, encoding, src, srcLen, - statePtr->inputEncodingFlags & (bufPtr->nextPtr - ? ~0 : ~TCL_ENCODING_END), &statePtr->inputEncodingState, - dst, dstLimit, &srcRead, &dstDecoded, &numChars); + (statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE) + & (bufPtr->nextPtr ? ~0 : ~TCL_ENCODING_END), + &statePtr->inputEncodingState, dst, dstLimit, &srcRead, + &dstDecoded, &numChars); /* * Perform the translation transformation in place. Read no more @@ -6050,7 +6051,7 @@ ReadChars( * time. */ - dstLimit = dstRead + TCL_UTF_MAX; + dstLimit = dstRead - 1 + TCL_UTF_MAX; statePtr->flags = savedFlags; statePtr->inputEncodingFlags = savedIEFlags; statePtr->inputEncodingState = savedState; @@ -6076,7 +6077,7 @@ ReadChars( * up back here in this call. */ - dstLimit = dstRead + TCL_UTF_MAX; + dstLimit = dstRead - 1 + TCL_UTF_MAX; statePtr->flags = savedFlags; statePtr->inputEncodingFlags = savedIEFlags; statePtr->inputEncodingState = savedState; @@ -6093,7 +6094,7 @@ ReadChars( */ if (code != TCL_OK) { - char buffer[TCL_UTF_MAX + 2]; + char buffer[TCL_UTF_MAX + 1]; int read, decoded, count; /* @@ -6105,9 +6106,10 @@ ReadChars( statePtr->inputEncodingState = savedState; Tcl_ExternalToUtf(NULL, encoding, src, srcLen, - statePtr->inputEncodingFlags & (bufPtr->nextPtr - ? ~0 : ~TCL_ENCODING_END), &statePtr->inputEncodingState, - buffer, TCL_UTF_MAX + 2, &read, &decoded, &count); + (statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE) + & (bufPtr->nextPtr ? ~0 : ~TCL_ENCODING_END), + &statePtr->inputEncodingState, buffer, TCL_UTF_MAX + 1, + &read, &decoded, &count); if (count == 2) { if (buffer[1] == '\n') { @@ -6119,7 +6121,6 @@ ReadChars( bufPtr->nextRemoved += srcRead; } - dst[1] = '\0'; statePtr->inputEncodingFlags &= ~TCL_ENCODING_START; Tcl_SetObjLength(objPtr, numBytes + 1); @@ -6166,7 +6167,7 @@ ReadChars( * Tcl_ExternalToUtf() call! */ - dstLimit = Tcl_UtfAtIndex(dst, charsToRead) + TCL_UTF_MAX - dst; + dstLimit = Tcl_UtfAtIndex(dst, charsToRead) - 1 + TCL_UTF_MAX - dst; statePtr->flags = savedFlags; statePtr->inputEncodingFlags = savedIEFlags; statePtr->inputEncodingState = savedState; |