From 2a5b403768444ddf2d6379ffe3644e9d5b230e19 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sat, 25 Feb 2023 16:29:09 +0000 Subject: Experimental fix for [fb368527ae] - length truncation --- generic/tclEncoding.c | 86 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 67 insertions(+), 19 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index ce5626f..3a39966 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1160,8 +1160,8 @@ Tcl_ExternalToUtfDStringEx( char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; - int result, soFar, srcRead, dstWrote, dstChars; - Tcl_Size dstLen; + int result; + Tcl_Size dstLen, soFar; const char *srcStart = src; Tcl_DStringInit(dstPtr); @@ -1179,23 +1179,47 @@ Tcl_ExternalToUtfDStringEx( srcLen = encodingPtr->lengthProc(src); } - flags |= TCL_ENCODING_START | TCL_ENCODING_END; + flags |= TCL_ENCODING_START; if (encodingPtr->toUtfProc == UtfToUtfProc) { flags |= ENCODING_INPUT; } while (1) { - result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen, - flags, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars); - soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); + int srcChunkLen, srcChunkRead; + int dstChunkLen, dstChunkWrote, dstChunkChars; + + if (srcLen > INT_MAX) { + srcChunkLen = INT_MAX; + } else { + srcChunkLen = srcLen; + flags |= TCL_ENCODING_END; /* Last chunk */ + } + dstChunkLen = dstLen > INT_MAX ? INT_MAX : dstLen; + + result = encodingPtr->toUtfProc(encodingPtr->clientData, src, + srcChunkLen, flags, &state, dst, dstChunkLen, + &srcChunkRead, &dstChunkWrote, &dstChunkChars); + soFar = dst + dstChunkWrote - Tcl_DStringValue(dstPtr); - src += srcRead; - if (result != TCL_CONVERT_NOSPACE) { + src += srcChunkRead; + srcLen -= srcChunkRead; + + /* + * Keep looping in two case - + * - our destination buffer did not have enough room + * - we had not passed in all the data and error indicated fragment + * of a multibyte character + * In both cases we have to grow buffer, move the input source pointer + * and loop. Otherwise, return the result we got. + */ + if ((result != TCL_CONVERT_NOSPACE) && + !(result == TCL_CONVERT_MULTIBYTE && (flags & TCL_ENCODING_END))) { Tcl_DStringSetLength(dstPtr, soFar); return (result == TCL_OK) ? TCL_INDEX_NONE : (Tcl_Size)(src - srcStart); } + flags &= ~TCL_ENCODING_START; - srcLen -= srcRead; + if (Tcl_DStringLength(dstPtr) == 0) { Tcl_DStringSetLength(dstPtr, dstLen); } @@ -1398,9 +1422,9 @@ Tcl_UtfToExternalDStringEx( char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; - int result, soFar, srcRead, dstWrote, dstChars; + int result; + Tcl_Size dstLen, soFar; const char *srcStart = src; - Tcl_Size dstLen; Tcl_DStringInit(dstPtr); dst = Tcl_DStringValue(dstPtr); @@ -1416,16 +1440,40 @@ Tcl_UtfToExternalDStringEx( } else if (srcLen == TCL_INDEX_NONE) { srcLen = strlen(src); } - flags |= TCL_ENCODING_START | TCL_ENCODING_END; + flags |= TCL_ENCODING_START; while (1) { + int srcChunkLen, srcChunkRead; + int dstChunkLen, dstChunkWrote, dstChunkChars; + + if (srcLen > INT_MAX) { + srcChunkLen = INT_MAX; + } else { + srcChunkLen = srcLen; + flags |= TCL_ENCODING_END; /* Last chunk */ + } + dstChunkLen = dstLen > INT_MAX ? INT_MAX : dstLen; + result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, - srcLen, flags, &state, dst, dstLen, - &srcRead, &dstWrote, &dstChars); - soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); + srcChunkLen, flags, &state, dst, dstChunkLen, + &srcChunkRead, &dstChunkWrote, &dstChunkChars); + soFar = dst + dstChunkWrote - Tcl_DStringValue(dstPtr); - src += srcRead; - if (result != TCL_CONVERT_NOSPACE) { - int i = soFar + encodingPtr->nullSize - 1; + /* Move past the part processed in this go around */ + src += srcChunkRead; + srcLen -= srcChunkRead; + + /* + * Keep looping in two case - + * - our destination buffer did not have enough room + * - we had not passed in all the data and error indicated fragment + * of a multibyte character + * In both cases we have to grow buffer, move the input source pointer + * and loop. Otherwise, return the result we got. + */ + if ((result != TCL_CONVERT_NOSPACE) && + !(result == TCL_CONVERT_MULTIBYTE && (flags & TCL_ENCODING_END))) { + size_t i = soFar + encodingPtr->nullSize - 1; + /* Loop as DStringSetLength only stores one nul byte at a time */ while (i >= soFar) { Tcl_DStringSetLength(dstPtr, i--); } @@ -1433,7 +1481,7 @@ Tcl_UtfToExternalDStringEx( } flags &= ~TCL_ENCODING_START; - srcLen -= srcRead; + if (Tcl_DStringLength(dstPtr) == 0) { Tcl_DStringSetLength(dstPtr, dstLen); } -- cgit v0.12 From 5ffda39949b785859a8ab5b9b4977536dde6f9f2 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 26 Feb 2023 12:56:18 +0000 Subject: Move the "srcLen -= srcChunkRead;" past the "if ((result != TCL_CONVERT_NOSPACE)..." (where it originally was), since this isn't needed if the loop ends anyway. --- generic/tclEncoding.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 3a39966..a6ecc26 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1187,7 +1187,7 @@ Tcl_ExternalToUtfDStringEx( while (1) { int srcChunkLen, srcChunkRead; int dstChunkLen, dstChunkWrote, dstChunkChars; - + if (srcLen > INT_MAX) { srcChunkLen = INT_MAX; } else { @@ -1202,7 +1202,6 @@ Tcl_ExternalToUtfDStringEx( soFar = dst + dstChunkWrote - Tcl_DStringValue(dstPtr); src += srcChunkRead; - srcLen -= srcChunkRead; /* * Keep looping in two case - @@ -1210,7 +1209,7 @@ Tcl_ExternalToUtfDStringEx( * - we had not passed in all the data and error indicated fragment * of a multibyte character * In both cases we have to grow buffer, move the input source pointer - * and loop. Otherwise, return the result we got. + * and loop. Otherwise, return the result we got. */ if ((result != TCL_CONVERT_NOSPACE) && !(result == TCL_CONVERT_MULTIBYTE && (flags & TCL_ENCODING_END))) { @@ -1219,6 +1218,7 @@ Tcl_ExternalToUtfDStringEx( } flags &= ~TCL_ENCODING_START; + srcLen -= srcChunkRead; if (Tcl_DStringLength(dstPtr) == 0) { Tcl_DStringSetLength(dstPtr, dstLen); @@ -1460,7 +1460,6 @@ Tcl_UtfToExternalDStringEx( /* Move past the part processed in this go around */ src += srcChunkRead; - srcLen -= srcChunkRead; /* * Keep looping in two case - @@ -1468,7 +1467,7 @@ Tcl_UtfToExternalDStringEx( * - we had not passed in all the data and error indicated fragment * of a multibyte character * In both cases we have to grow buffer, move the input source pointer - * and loop. Otherwise, return the result we got. + * and loop. Otherwise, return the result we got. */ if ((result != TCL_CONVERT_NOSPACE) && !(result == TCL_CONVERT_MULTIBYTE && (flags & TCL_ENCODING_END))) { @@ -1481,6 +1480,7 @@ Tcl_UtfToExternalDStringEx( } flags &= ~TCL_ENCODING_START; + srcLen -= srcChunkRead; if (Tcl_DStringLength(dstPtr) == 0) { Tcl_DStringSetLength(dstPtr, dstLen); -- cgit v0.12 From e2d89615d52e47ed3b683498567e058e809aea39 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Mon, 27 Feb 2023 04:15:07 +0000 Subject: Tests for encoding strings > 4GB (under perf constraint) --- tests/encoding.test | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/encoding.test b/tests/encoding.test index e0e1598..8b14353 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -1046,6 +1046,32 @@ test encoding-29.0 {get encoding nul terminator lengths} -constraints { [testencoding nullength ksc5601] } -result {1 2 4 2 2} +test encoding-30.0 {encoding convertto large strings UINT_MAX} -constraints { + perf +} -body { + # Test to ensure not misinterpreted as -1 + list [string length [set s [string repeat A 0xFFFFFFFF]]] [string equal $s [encoding convertto ascii $s]] +} -result {4294967295 1} + +test encoding-30.1 {encoding convertto large strings > 4GB} -constraints { + perf +} -body { + list [string length [set s [string repeat A 0x100000000]]] [string equal $s [encoding convertto ascii $s]] +} -result {4294967296 1} + +test encoding-30.2 {encoding convertfrom large strings UINT_MAX} -constraints { + perf +} -body { + # Test to ensure not misinterpreted as -1 + list [string length [set s [string repeat A 0xFFFFFFFF]]] [string equal $s [encoding convertfrom ascii $s]] +} -result {4294967295 1} + +test encoding-30.3 {encoding convertfrom large strings > 4GB} -constraints { + perf +} -body { + list [string length [set s [string repeat A 0x100000000]]] [string equal $s [encoding convertfrom ascii $s]] +} -result {4294967296 1} + # cleanup namespace delete ::tcl::test::encoding -- cgit v0.12 From 85bf0db1e84ab483fce7962c151bedeb3f5e0993 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Mon, 27 Feb 2023 12:31:49 +0000 Subject: Fix crash. int->size_t needs +1 in comparisons. --- generic/tclEncoding.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index a6ecc26..d0756c7 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1473,7 +1473,7 @@ Tcl_UtfToExternalDStringEx( !(result == TCL_CONVERT_MULTIBYTE && (flags & TCL_ENCODING_END))) { size_t i = soFar + encodingPtr->nullSize - 1; /* Loop as DStringSetLength only stores one nul byte at a time */ - while (i >= soFar) { + while (i+1 >= soFar+1) { Tcl_DStringSetLength(dstPtr, i--); } return (result == TCL_OK) ? TCL_INDEX_NONE : (Tcl_Size)(src - srcStart); -- cgit v0.12