diff options
Diffstat (limited to 'generic/tclEncoding.c')
-rw-r--r-- | generic/tclEncoding.c | 84 |
1 files changed, 63 insertions, 21 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 95c59c0..179ca17 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1206,7 +1206,10 @@ Tcl_ExternalToUtf( * output buffer. */ { const Encoding *encodingPtr; - int result, srcRead, dstWrote, dstChars; + int result, srcRead, dstWrote, dstChars = 0; + int noTerminate = flags & TCL_ENCODING_NO_TERMINATE; + int charLimited = (flags & TCL_ENCODING_CHAR_LIMIT) && dstCharsPtr; + int maxChars = INT_MAX; Tcl_EncodingState state; if (encoding == NULL) { @@ -1231,19 +1234,40 @@ Tcl_ExternalToUtf( } if (dstCharsPtr == NULL) { dstCharsPtr = &dstChars; + flags &= ~TCL_ENCODING_CHAR_LIMIT; + } else if (charLimited) { + maxChars = *dstCharsPtr; } - /* - * If there are any null characters in the middle of the buffer, they will - * converted to the UTF-8 null character (\xC080). To get the actual \0 at - * the end of the destination buffer, we need to append it manually. - */ + if (!noTerminate) { + /* + * If there are any null characters in the middle of the buffer, + * they will converted to the UTF-8 null character (\xC080). To get + * the actual \0 at the end of the destination buffer, we need to + * append it manually. First make room for it... + */ - dstLen--; - result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen, - flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, - dstCharsPtr); - dst[*dstWrotePtr] = '\0'; + dstLen--; + } + do { + int savedFlags = flags; + Tcl_EncodingState savedState = *statePtr; + + result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen, + flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, + dstCharsPtr); + if (*dstCharsPtr <= maxChars) { + break; + } + dstLen = Tcl_UtfAtIndex(dst, maxChars) - 1 - dst + TCL_UTF_MAX; + flags = savedFlags; + *statePtr = savedState; + } while (1); + if (!noTerminate) { + /* ...and then append it */ + + dst[*dstWrotePtr] = '\0'; + } return result; } @@ -2107,6 +2131,9 @@ BinaryProc( if (dstLen < 0) { dstLen = 0; } + if ((flags & TCL_ENCODING_CHAR_LIMIT) && srcLen > *dstCharsPtr) { + srcLen = *dstCharsPtr; + } if (srcLen > dstLen) { srcLen = dstLen; result = TCL_CONVERT_NOSPACE; @@ -2267,7 +2294,7 @@ UtfToUtfProc( { const char *srcStart, *srcEnd, *srcClose; const char *dstStart, *dstEnd; - int result, numChars; + int result, numChars, charLimit = INT_MAX; Tcl_UniChar ch; result = TCL_OK; @@ -2278,11 +2305,14 @@ UtfToUtfProc( if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } + if (flags & TCL_ENCODING_CHAR_LIMIT) { + charLimit = *dstCharsPtr; + } dstStart = dst; dstEnd = dst + dstLen - TCL_UTF_MAX; - for (numChars = 0; src < srcEnd; numChars++) { + for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { /* * If there is more string to follow, this will ensure that the @@ -2378,9 +2408,12 @@ UnicodeToUtfProc( { const char *srcStart, *srcEnd; const char *dstEnd, *dstStart; - int result, numChars; + int result, numChars, charLimit = INT_MAX; Tcl_UniChar ch; + if (flags & TCL_ENCODING_CHAR_LIMIT) { + charLimit = *dstCharsPtr; + } result = TCL_OK; if ((srcLen % sizeof(Tcl_UniChar)) != 0) { result = TCL_CONVERT_MULTIBYTE; @@ -2394,7 +2427,7 @@ UnicodeToUtfProc( dstStart = dst; dstEnd = dst + dstLen - TCL_UTF_MAX; - for (numChars = 0; src < srcEnd; numChars++) { + for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; @@ -2562,12 +2595,15 @@ TableToUtfProc( { const char *srcStart, *srcEnd; const char *dstEnd, *dstStart, *prefixBytes; - int result, byte, numChars; + int result, byte, numChars, charLimit = INT_MAX; Tcl_UniChar ch; const unsigned short *const *toUnicode; const unsigned short *pageZero; TableEncodingData *dataPtr = clientData; + if (flags & TCL_ENCODING_CHAR_LIMIT) { + charLimit = *dstCharsPtr; + } srcStart = src; srcEnd = src + srcLen; @@ -2579,7 +2615,7 @@ TableToUtfProc( pageZero = toUnicode[0]; result = TCL_OK; - for (numChars = 0; src < srcEnd; numChars++) { + for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; @@ -2793,8 +2829,11 @@ Iso88591ToUtfProc( { const char *srcStart, *srcEnd; const char *dstEnd, *dstStart; - int result, numChars; + int result, numChars, charLimit = INT_MAX; + if (flags & TCL_ENCODING_CHAR_LIMIT) { + charLimit = *dstCharsPtr; + } srcStart = src; srcEnd = src + srcLen; @@ -2802,7 +2841,7 @@ Iso88591ToUtfProc( dstEnd = dst + dstLen - TCL_UTF_MAX; result = TCL_OK; - for (numChars = 0; src < srcEnd; numChars++) { + for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { Tcl_UniChar ch; if (dst > dstEnd) { @@ -3018,9 +3057,12 @@ EscapeToUtfProc( const char *prefixBytes, *tablePrefixBytes, *srcStart, *srcEnd; const unsigned short *const *tableToUnicode; const Encoding *encodingPtr; - int state, result, numChars; + int state, result, numChars, charLimit = INT_MAX; const char *dstStart, *dstEnd; + if (flags & TCL_ENCODING_CHAR_LIMIT) { + charLimit = *dstCharsPtr; + } result = TCL_OK; tablePrefixBytes = NULL; /* lint. */ tableToUnicode = NULL; /* lint. */ @@ -3038,7 +3080,7 @@ EscapeToUtfProc( state = 0; } - for (numChars = 0; src < srcEnd; ) { + for (numChars = 0; src < srcEnd && numChars <= charLimit; ) { int byte, hi, lo, ch; if (dst > dstEnd) { |