From 1889ded1144a4dbd44d0c6f03e72a01d70115a51 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 11 Mar 2023 22:00:29 +0000 Subject: Proposed fix for [db7a085bd9]: encoding convertfrom -strict utf-16 accepts partial surrogates. TODO: testcases, and implement for 8.7 too --- generic/tclCmdAH.c | 2 +- generic/tclEncoding.c | 30 ++++++++++++++++++++++++++---- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 4df1216..ac504d0 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -514,7 +514,7 @@ EncodingConvertfromObjCmd( char buf[TCL_INTEGER_SPACE]; sprintf(buf, "%" TCL_Z_MODIFIER "u", result); Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected byte sequence starting at index %" - TCL_Z_MODIFIER "u: '\\x%X'", result, UCHAR(bytesPtr[result]))); + TCL_Z_MODIFIER "u: '\\x%02X'", result, UCHAR(bytesPtr[result]))); Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", buf, NULL); Tcl_DStringFree(&ds); diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index fc3397a..4f334bb 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2603,6 +2603,7 @@ Utf32ToUtfProc( dst += Tcl_UniCharToUtf(-1, dst); } #endif + if ((unsigned)ch > 0x10FFFF) { ch = 0xFFFD; if (STOPONERROR) { @@ -2639,6 +2640,7 @@ Utf32ToUtfProc( dst += Tcl_UniCharToUtf(-1, dst); } #endif + if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) { /* We have a single byte left-over at the end */ if (dst > dstEnd) { @@ -2846,6 +2848,13 @@ Utf16ToUtfProc( ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF); } if (((prev & ~0x3FF) == 0xD800) && ((ch & ~0x3FF) != 0xDC00)) { + if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) { + result = TCL_CONVERT_UNKNOWN; + src -= 2; /* Go back to before the high surrogate */ + dst--; /* Also undo writing a single byte too much */ + numChars--; + break; + } /* Bug [10c2c17c32]. If Hi surrogate not followed by Lo surrogate, finish 3-byte UTF-8 */ dst += Tcl_UniCharToUtf(-1, dst); } @@ -2855,17 +2864,30 @@ Utf16ToUtfProc( * unsigned short-size data. */ - if (ch && ch < 0x80) { + if ((unsigned)ch - 1 < 0x7F) { *dst++ = (ch & 0xFF); - } else { + } else if (((prev & ~0x3FF) == 0xD800) || ((ch & ~0x3FF) == 0xD800)) { dst += Tcl_UniCharToUtf(ch | TCL_COMBINE, dst); + } else if (((ch & ~0x3FF) == 0xDC00) && ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) { + /* Lo surrogate not preceded by Hi surrogate */ + result = TCL_CONVERT_UNKNOWN; + break; + } else { + dst += Tcl_UniCharToUtf(ch, dst); } src += sizeof(unsigned short); } if ((ch & ~0x3FF) == 0xD800) { - /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ - dst += Tcl_UniCharToUtf(-1, dst); + if ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) { + result = TCL_CONVERT_UNKNOWN; + src -= 2; + dst--; + numChars--; + } else { + /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ + dst += Tcl_UniCharToUtf(-1, dst); + } } if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) { /* We have a single byte left-over at the end */ -- cgit v0.12 From eeee744ee2f72edd36c45a3ee07dbbee39f16994 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 12 Mar 2023 16:10:52 +0000 Subject: Minor bug-fix for utf-32: Only throw exception for codepoints > +U10FFFF if "-strict" is specified. Otherwise replace with 0xFFFD --- generic/tclEncoding.c | 12 +++++------- tests/encoding.test | 6 ++++++ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 4f334bb..a471fe9 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2606,19 +2606,17 @@ Utf32ToUtfProc( if ((unsigned)ch > 0x10FFFF) { ch = 0xFFFD; - if (STOPONERROR) { + if ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) { result = TCL_CONVERT_SYNTAX; break; } } else if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) && ((ch & ~0x7FF) == 0xD800)) { - if (STOPONERROR) { - result = TCL_CONVERT_SYNTAX; + result = TCL_CONVERT_SYNTAX; #if TCL_UTF_MAX < 4 - ch = 0; + ch = 0; #endif - break; - } + break; } /* @@ -2850,7 +2848,7 @@ Utf16ToUtfProc( if (((prev & ~0x3FF) == 0xD800) && ((ch & ~0x3FF) != 0xDC00)) { if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) { result = TCL_CONVERT_UNKNOWN; - src -= 2; /* Go back to before the high surrogate */ + src -= 2; /* Go back to beginning of high surrogate */ dst--; /* Also undo writing a single byte too much */ numChars--; break; diff --git a/tests/encoding.test b/tests/encoding.test index 68b5dcd..c8f34ba 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -569,6 +569,12 @@ test encoding-16.22 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body { test encoding-16.23 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body { encoding convertfrom -strict utf-16le \x00\xDC } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x00'} +test encoding-16.24 {Utf32ToUtfProc} -body { + encoding convertfrom utf-32 "\xFF\xFF\xFF\xFF" +} -result \uFFFD +test encoding-16.25 {Utf32ToUtfProc} -body { + encoding convertfrom utf-32 "\x01\x00\x00\x01" +} -result \uFFFD test encoding-17.1 {UtfToUtf16Proc} -body { encoding convertto utf-16 "\U460DC" -- cgit v0.12 From aee8588fbcee145de5cf3012f7c0c60277fb5394 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 12 Mar 2023 16:23:33 +0000 Subject: 2 more testcases, proving utf-32 handling of surrogates (actually: not handling!) is OK --- tests/encoding.test | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/encoding.test b/tests/encoding.test index c8f34ba..5e8d3f7 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -606,6 +606,12 @@ test encoding-17.9 {Utf32ToUtfProc} -body { test encoding-17.10 {Utf32ToUtfProc} -body { encoding convertfrom -nocomplain utf-32 "\xFF\xFF\xFF\xFF" } -result \uFFFD +test encoding-17.11 {Utf32ToUtfProc} -body { + encoding convertfrom -strict utf-32le "\x00\xD8\x00\x00" +} -returnCodes error -result {unexpected byte sequence starting at index 0: '\x00'} +test encoding-17.12 {Utf32ToUtfProc} -body { + encoding convertfrom -strict utf-32le "\x00\xDC\x00\x00" +} -returnCodes error -result {unexpected byte sequence starting at index 0: '\x00'} test encoding-18.1 {TableToUtfProc on invalid input} -body { list [catch {encoding convertto jis0208 \\} res] $res -- cgit v0.12 From 131176ce2f937173892c6e7e3a78978f6e8da2b5 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 12 Mar 2023 16:37:12 +0000 Subject: Backport [6fb14ee3e876978c]. Add testcases --- generic/tclEncoding.c | 12 +++++------- tests/encoding.test | 12 ++++++++++++ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index b3409d6..27f11d8 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2605,17 +2605,15 @@ Utf32ToUtfProc( if ((unsigned)ch > 0x10FFFF) { ch = 0xFFFD; - if (STOPONERROR) { + if ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) { result = TCL_CONVERT_SYNTAX; break; } } else if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) && ((ch & ~0x7FF) == 0xD800)) { - if (STOPONERROR) { - result = TCL_CONVERT_SYNTAX; - ch = 0; - break; - } + result = TCL_CONVERT_SYNTAX; + ch = 0; + break; } /* @@ -2845,7 +2843,7 @@ Utf16ToUtfProc( if (((prev & ~0x3FF) == 0xD800) && ((ch & ~0x3FF) != 0xDC00)) { if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) { result = TCL_CONVERT_UNKNOWN; - src -= 2; /* Go back to before the high surrogate */ + src -= 2; /* Go back to beginning of high surrogate */ dst--; /* Also undo writing a single byte too much */ numChars--; break; diff --git a/tests/encoding.test b/tests/encoding.test index 0fe64ce..cf63211 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -568,6 +568,12 @@ test encoding-16.22 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body { test encoding-16.23 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body { encoding convertfrom -strict utf-16le \x00\xDC } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x00'} +test encoding-16.24 {Utf32ToUtfProc} -body { + encoding convertfrom utf-32 "\xFF\xFF\xFF\xFF" +} -result \uFFFD +test encoding-16.25 {Utf32ToUtfProc} -body { + encoding convertfrom utf-32 "\x01\x00\x00\x01" +} -result \uFFFD test encoding-17.1 {UtfToUtf16Proc} -body { encoding convertto utf-16 "\U460DC" @@ -599,6 +605,12 @@ test encoding-17.9 {Utf32ToUtfProc} -body { test encoding-17.10 {Utf32ToUtfProc} -body { encoding convertfrom -nocomplain utf-32 "\xFF\xFF\xFF\xFF" } -result \uFFFD +test encoding-17.11 {Utf32ToUtfProc} -body { + encoding convertfrom -strict utf-32le "\x00\xD8\x00\x00" +} -returnCodes error -result {unexpected byte sequence starting at index 0: '\x00'} +test encoding-17.12 {Utf32ToUtfProc} -body { + encoding convertfrom -strict utf-32le "\x00\xDC\x00\x00" +} -returnCodes error -result {unexpected byte sequence starting at index 0: '\x00'} test encoding-18.1 {TableToUtfProc on invalid input} -constraints deprecated -body { list [catch {encoding convertto jis0208 \\} res] $res -- cgit v0.12 From 967e55306e7bb0a58a7cf2c5b905a2608f395875 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Mon, 13 Mar 2023 12:22:06 +0000 Subject: Fix for issue [ea69b0258a9833cb], crash when using a channel transformation on TCP client socket. --- generic/tclIO.c | 38 ++++++++++++++---------- tests/ioTrans.test | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 107 insertions(+), 17 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 85ff39b..715f8c7 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -8551,6 +8551,7 @@ UpdateInterest( mask &= ~TCL_EXCEPTION; if (!statePtr->timer) { + TclChannelPreserve((Tcl_Channel)chanPtr); statePtr->timer = Tcl_CreateTimerHandler(SYNTHETIC_EVENT_TIME, ChannelTimerProc, chanPtr); } @@ -8584,23 +8585,28 @@ ChannelTimerProc( ChannelState *statePtr = chanPtr->state; /* State info for channel */ - if (!GotFlag(statePtr, CHANNEL_NEED_MORE_DATA) - && (statePtr->interestMask & TCL_READABLE) - && (statePtr->inQueueHead != NULL) - && IsBufferReady(statePtr->inQueueHead)) { - /* - * Restart the timer in case a channel handler reenters the event loop - * before UpdateInterest gets called by Tcl_NotifyChannel. - */ - - statePtr->timer = Tcl_CreateTimerHandler(SYNTHETIC_EVENT_TIME, - ChannelTimerProc,chanPtr); - Tcl_Preserve(statePtr); - Tcl_NotifyChannel((Tcl_Channel) chanPtr, TCL_READABLE); - Tcl_Release(statePtr); + if (chanPtr->typePtr == NULL) { + TclChannelRelease((Tcl_Channel)chanPtr); } else { - statePtr->timer = NULL; - UpdateInterest(chanPtr); + if (!GotFlag(statePtr, CHANNEL_NEED_MORE_DATA) + && (statePtr->interestMask & TCL_READABLE) + && (statePtr->inQueueHead != NULL) + && IsBufferReady(statePtr->inQueueHead)) { + /* + * Restart the timer in case a channel handler reenters the event loop + * before UpdateInterest gets called by Tcl_NotifyChannel. + */ + + statePtr->timer = Tcl_CreateTimerHandler(SYNTHETIC_EVENT_TIME, + ChannelTimerProc,chanPtr); + Tcl_Preserve(statePtr); + Tcl_NotifyChannel((Tcl_Channel) chanPtr, TCL_READABLE); + Tcl_Release(statePtr); + } else { + statePtr->timer = NULL; + UpdateInterest(chanPtr); + TclChannelRelease((Tcl_Channel)chanPtr); + } } } diff --git a/tests/ioTrans.test b/tests/ioTrans.test index f185117..130ff80 100644 --- a/tests/ioTrans.test +++ b/tests/ioTrans.test @@ -634,6 +634,58 @@ test iortrans-4.9 {chan read, gets, bug 2921116} -setup { } } + + +namespace eval reflector { + proc initialize {_ chan mode} { + return {initialize finalize watch read} + } + + + proc finalize {_ chan} { + namespace delete $_ + } + + + proc read {_ chan count} { + namespace upvar $_ source source + set res [string range $source 0 $count-1] + set source [string range $source $count end] + return $res + } + + + proc watch {_ chan events} { + after 0 [list chan postevent $chan read] + return read + } + + namespace ensemble create -parameters _ + namespace export * +} + + + + +namespace eval inputfilter { + proc initialize {chan mode} { + return {initialize finalize read} + } + + proc read {chan buffer} { + return $buffer + } + + proc finalize chan { + namespace delete $chan + } + + namespace ensemble create + namespace export * +} + + + # Channel read transform that is just the identity - pass all through proc idxform {cmd handle args} { switch -- $cmd { @@ -2089,7 +2141,39 @@ test iortrans.tf-11.1 {origin thread of moved transform destroyed during access} thread::release $tidb } -result {Owner lost} -# ### ### ### ######### ######### ######### + +test iortrans-ea69b0258a9833cb { + Crash when using a channel transformation on TCP client socket + + "line two" does not make it into result. This issue should probably be + addressed, but it is outside the scope of this test. +} -setup { + set res {} + set read 0 +} -body { + namespace eval reflector1 { + variable source "line one\nline two" + interp alias {} [namespace current]::dispatch {} [ + namespace parent]::reflector [namespace current] + } + set chan [chan create read [namespace which reflector1::dispatch]] + chan configure $chan -blocking 0 + chan push $chan inputfilter + chan event $chan read [list ::apply [list chan { + variable res + variable read + set gets [gets $chan] + append res $gets + incr read + } [namespace current]] $chan] + vwait [namespace current]::read + chan pop $chan + vwait [namespace current]::read + return $res +} -cleanup { + catch {unset read} + close $chan +} -result {line one} cleanupTests return -- cgit v0.12 From df8a3a6ea4a6ede9d9be56eacae69d8f40d624ca Mon Sep 17 00:00:00 2001 From: pooryorick Date: Mon, 13 Mar 2023 13:36:52 +0000 Subject: Fix for issue [ea69b0258a9833cb], crash when using a channel transformation on TCP client socket. --- generic/tclIO.c | 67 +++++++++++++++++++++++++----------------- tests/ioTrans.test | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 125 insertions(+), 28 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index da06171..58137a5 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -8804,6 +8804,7 @@ UpdateInterest( mask &= ~TCL_EXCEPTION; if (!statePtr->timer) { + TclChannelPreserve((Tcl_Channel)chanPtr); statePtr->timer = Tcl_CreateTimerHandler(SYNTHETIC_EVENT_TIME, ChannelTimerProc, chanPtr); } @@ -8814,6 +8815,7 @@ UpdateInterest( && mask & TCL_WRITABLE && GotFlag(statePtr, CHANNEL_NONBLOCKING)) { + TclChannelPreserve((Tcl_Channel)chanPtr); statePtr->timer = Tcl_CreateTimerHandler(SYNTHETIC_EVENT_TIME, ChannelTimerProc,chanPtr); } @@ -8848,44 +8850,55 @@ ChannelTimerProc( /* State info for channel */ ChannelState *statePtr = chanPtr->state; - /* Preserve chanPtr to guard against deallocation in Tcl_NotifyChannel. */ - TclChannelPreserve((Tcl_Channel)chanPtr); - Tcl_Preserve(statePtr); - statePtr->timer = NULL; - if (statePtr->interestMask & TCL_WRITABLE - && GotFlag(statePtr, CHANNEL_NONBLOCKING) - && !GotFlag(statePtr, BG_FLUSH_SCHEDULED) - ) { - /* - * Restart the timer in case a channel handler reenters the event loop - * before UpdateInterest gets called by Tcl_NotifyChannel. - */ - statePtr->timer = Tcl_CreateTimerHandler(SYNTHETIC_EVENT_TIME, - ChannelTimerProc,chanPtr); - Tcl_NotifyChannel((Tcl_Channel) chanPtr, TCL_WRITABLE); - } + /* TclChannelPreserve() must be called before the current function was + * scheduled, is already in effect. In this function it guards against + * deallocation in Tcl_NotifyChannel and also keps the channel preserved + * until ChannelTimerProc is later called again. + */ - /* The channel may have just been closed from within Tcl_NotifyChannel */ - if (!GotFlag(statePtr, CHANNEL_INCLOSE)) { - if (!GotFlag(statePtr, CHANNEL_NEED_MORE_DATA) - && (statePtr->interestMask & TCL_READABLE) - && (statePtr->inQueueHead != NULL) - && IsBufferReady(statePtr->inQueueHead)) { + if (chanPtr->typePtr == NULL) { + TclChannelRelease((Tcl_Channel)chanPtr); + } else { + Tcl_Preserve(statePtr); + statePtr->timer = NULL; + if (statePtr->interestMask & TCL_WRITABLE + && GotFlag(statePtr, CHANNEL_NONBLOCKING) + && !GotFlag(statePtr, BG_FLUSH_SCHEDULED) + ) { /* * Restart the timer in case a channel handler reenters the event loop * before UpdateInterest gets called by Tcl_NotifyChannel. */ - statePtr->timer = Tcl_CreateTimerHandler(SYNTHETIC_EVENT_TIME, ChannelTimerProc,chanPtr); - Tcl_NotifyChannel((Tcl_Channel) chanPtr, TCL_READABLE); + Tcl_NotifyChannel((Tcl_Channel) chanPtr, TCL_WRITABLE); } else { - UpdateInterest(chanPtr); + /* The channel may have just been closed from within Tcl_NotifyChannel */ + if (!GotFlag(statePtr, CHANNEL_INCLOSE)) { + if (!GotFlag(statePtr, CHANNEL_NEED_MORE_DATA) + && (statePtr->interestMask & TCL_READABLE) + && (statePtr->inQueueHead != NULL) + && IsBufferReady(statePtr->inQueueHead)) { + /* + * Restart the timer in case a channel handler reenters the event loop + * before UpdateInterest gets called by Tcl_NotifyChannel. + */ + + statePtr->timer = Tcl_CreateTimerHandler(SYNTHETIC_EVENT_TIME, + ChannelTimerProc,chanPtr); + Tcl_NotifyChannel((Tcl_Channel) chanPtr, TCL_READABLE); + } else { + TclChannelRelease((Tcl_Channel)chanPtr); + UpdateInterest(chanPtr); + } + } else { + TclChannelRelease((Tcl_Channel)chanPtr); + } } + + Tcl_Release(statePtr); } - Tcl_Release(statePtr); - TclChannelRelease((Tcl_Channel)chanPtr); } /* diff --git a/tests/ioTrans.test b/tests/ioTrans.test index 79493e0..f481a17 100644 --- a/tests/ioTrans.test +++ b/tests/ioTrans.test @@ -634,6 +634,58 @@ test iortrans-4.9 {chan read, gets, bug 2921116} -setup { } } + + +namespace eval reflector { + proc initialize {_ chan mode} { + return {initialize finalize watch read} + } + + + proc finalize {_ chan} { + namespace delete $_ + } + + + proc read {_ chan count} { + namespace upvar $_ source source + set res [string range $source 0 $count-1] + set source [string range $source $count end] + return $res + } + + + proc watch {_ chan events} { + after 0 [list chan postevent $chan read] + return read + } + + namespace ensemble create -parameters _ + namespace export * +} + + + + +namespace eval inputfilter { + proc initialize {chan mode} { + return {initialize finalize read} + } + + proc read {chan buffer} { + return $buffer + } + + proc finalize chan { + namespace delete $chan + } + + namespace ensemble create + namespace export * +} + + + # Channel read transform that is just the identity - pass all through proc idxform {cmd handle args} { switch -- $cmd { @@ -2089,7 +2141,39 @@ test iortrans.tf-11.1 {origin thread of moved transform destroyed during access} thread::release $tidb } -result {Owner lost} -# ### ### ### ######### ######### ######### + +test iortrans-ea69b0258a9833cb { + Crash when using a channel transformation on TCP client socket + + "line two" does not make it into result. This issue should probably be + addressed, but it is outside the scope of this test. +} -setup { + set res {} + set read 0 +} -body { + namespace eval reflector1 { + variable source "line one\nline two" + interp alias {} [namespace current]::dispatch {} [ + namespace parent]::reflector [namespace current] + } + set chan [chan create read [namespace which reflector1::dispatch]] + chan configure $chan -blocking 0 + chan push $chan inputfilter + chan event $chan read [list ::apply [list chan { + variable res + variable read + set gets [gets $chan] + append res $gets + incr read + } [namespace current]] $chan] + vwait [namespace current]::read + chan pop $chan + vwait [namespace current]::read + return $res +} -cleanup { + catch {unset read} + close $chan +} -result {line one} cleanupTests return -- cgit v0.12 From 6d7423228211f312016f0c62ce1bc86c3d3777db Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Mon, 13 Mar 2023 13:44:35 +0000 Subject: Bug [183a1adcc0]. Buffer overflow in Tcl_UtfToExternal --- generic/tclEncoding.c | 14 +++ generic/tclTest.c | 236 +++++++++++++++++++++++++++++++++++++++++++++++++- tests/encoding.test | 35 ++++++++ 3 files changed, 283 insertions(+), 2 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 2b3b614..92217f3 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1233,6 +1233,9 @@ Tcl_ExternalToUtf( } if (!noTerminate) { + if (dstLen < 1) { + return TCL_CONVERT_NOSPACE; + } /* * If there are any null characters in the middle of the buffer, * they will converted to the UTF-8 null character (\xC080). To get @@ -1241,6 +1244,10 @@ Tcl_ExternalToUtf( */ dstLen--; + } else { + if (dstLen < 0) { + return TCL_CONVERT_NOSPACE; + } } do { Tcl_EncodingState savedState = *statePtr; @@ -1415,10 +1422,17 @@ Tcl_UtfToExternal( dstCharsPtr = &dstChars; } + if (dstLen < encodingPtr->nullSize) { + return TCL_CONVERT_NOSPACE; + } dstLen -= encodingPtr->nullSize; result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr); + /* + * Buffer is terminated irrespective of result. Not sure this is + * reasonable but keep for historical/compatibility reasons. + */ if (encodingPtr->nullSize == 2) { dst[*dstWrotePtr + 1] = '\0'; } diff --git a/generic/tclTest.c b/generic/tclTest.c index bc51c99..c2b7144 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -1817,6 +1817,234 @@ static void SpecialFree(blockPtr) } /* + *------------------------------------------------------------------------ + * + * UtfTransformFn -- + * + * Implements a direct call into Tcl_UtfToExternal and Tcl_ExternalToUtf + * as otherwise there is no script level command that directly exercises + * these functions (i/o command cannot test all combinations) + * The arguments at the script level are roughly those of the above + * functions: + * encodingname srcbytes flags state dstlen ?srcreadvar? ?dstwrotevar? ?dstcharsvar? + * + * Results: + * TCL_OK or TCL_ERROR. This any errors running the test, NOT the + * result of Tcl_UtfToExternal or Tcl_ExternalToUtf. + * + * Side effects: + * + * The result in the interpreter is a list of the return code from the + * Tcl_UtfToExternal/Tcl_ExternalToUtf functions, the encoding state, and + * an encoded binary string of length dstLen. Note the string is the + * entire output buffer, not just the part containing the decoded + * portion. This allows for additional checks at test script level. + * + * If any of the srcreadvar, dstwrotevar and + * dstcharsvar are specified and not empty, they are treated as names + * of variables where the *srcRead, *dstWrote and *dstChars output + * from the functions are stored. + * + * The function also checks internally whether nuls are correctly + * appended as requested but the TCL_ENCODING_NO_TERMINATE flag + * and that no buffer overflows occur. + *------------------------------------------------------------------------ + */ +typedef int +UtfTransformFn(Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, + char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); +static int UtfExtWrapper( + Tcl_Interp *interp, UtfTransformFn *transformer, int objc, Tcl_Obj *const objv[]) +{ + Tcl_Encoding encoding; + Tcl_EncodingState encState, *encStatePtr; + int srcLen, bufLen; + const char *bytes; + char *bufPtr; + int srcRead, dstLen, dstWrote, dstChars; + Tcl_Obj *srcReadVar, *dstWroteVar, *dstCharsVar; + int result; + int flags; + Tcl_Obj **flagObjs; + int nflags; + + if (objc < 7 || objc > 10) { + Tcl_WrongNumArgs(interp, + 2, + objv, + "encoding srcbytes flags state dstlen ?srcreadvar? ?dstwrotevar? ?dstcharsvar?"); + return TCL_ERROR; + } + if (Tcl_GetEncodingFromObj(interp, objv[2], &encoding) != TCL_OK) { + return TCL_ERROR; + } + + /* Flags may be specified as list of integers and keywords */ + flags = 0; + if (Tcl_ListObjGetElements(interp, objv[4], &nflags, &flagObjs) != TCL_OK) { + return TCL_ERROR; + } + + struct { + const char *flagKey; + int flag; + } flagMap[] = { + {"start", TCL_ENCODING_START}, + {"end", TCL_ENCODING_END}, + {"stoponerror", TCL_ENCODING_STOPONERROR}, + {"noterminate", TCL_ENCODING_NO_TERMINATE}, + {"charlimit", TCL_ENCODING_CHAR_LIMIT}, + {NULL, 0} + }; + int i; + for (i = 0; i < nflags; ++i) { + int flag; + if (Tcl_GetIntFromObj(NULL, flagObjs[i], &flag) == TCL_OK) { + flags |= flag; + } + else { + int idx; + if (Tcl_GetIndexFromObjStruct(interp, + flagObjs[i], + flagMap, + sizeof(flagMap[0]), + "flag", + 0, + &idx) != TCL_OK) { + return TCL_ERROR; + } + flags |= flagMap[idx].flag; + } + } + + /* Assumes state is integer if not "" */ + Tcl_WideInt wide; + if (Tcl_GetWideIntFromObj(interp, objv[5], &wide) == TCL_OK) { + encState = (Tcl_EncodingState) wide; + encStatePtr = &encState; + } else if (Tcl_GetCharLength(objv[5]) == 0) { + encStatePtr = NULL; + } else { + return TCL_ERROR; + } + if (Tcl_GetIntFromObj(interp, objv[6], &dstLen) != TCL_OK) { + return TCL_ERROR; + } + srcReadVar = NULL; + dstWroteVar = NULL; + dstCharsVar = NULL; + if (objc > 7) { + /* Has caller requested srcRead? */ + if (Tcl_GetCharLength(objv[7])) { + srcReadVar = objv[7]; + } + if (objc > 8) { + /* Ditto for dstWrote */ + if (Tcl_GetCharLength(objv[8])) { + dstWroteVar = objv[8]; + } + if (objc > 9) { + if (Tcl_GetCharLength(objv[9])) { + dstCharsVar = objv[9]; + } + } + } + } + if (flags & TCL_ENCODING_CHAR_LIMIT) { + /* Caller should have specified the dest char limit */ + Tcl_Obj *valueObj; + if (dstCharsVar == NULL || + (valueObj = Tcl_ObjGetVar2(interp, dstCharsVar, NULL, 0)) == NULL + ) { + Tcl_SetResult(interp, + "dstCharsVar must be specified with integer value if " + "TCL_ENCODING_CHAR_LIMIT set in flags.", TCL_STATIC); + return TCL_ERROR; + } + if (Tcl_GetIntFromObj(interp, valueObj, &dstChars) != TCL_OK) { + return TCL_ERROR; + } + } else { + dstChars = 0; /* Only used for output */ + } + + bufLen = dstLen + 4; /* 4 -> overflow detection */ + bufPtr = ckalloc(bufLen); + memset(bufPtr, 0xFF, dstLen); /* Need to check nul terminator */ + memmove(bufPtr + dstLen, "\xAB\xCD\xEF\xAB", 4); /* overflow detection */ + bytes = (char *) Tcl_GetByteArrayFromObj(objv[3], &srcLen); /* Last! to avoid shimmering */ + result = (*transformer)(interp, encoding, bytes, srcLen, flags, + encStatePtr, bufPtr, dstLen, + srcReadVar ? &srcRead : NULL, + &dstWrote, + dstCharsVar ? &dstChars : NULL); + if (memcmp(bufPtr + bufLen - 4, "\xAB\xCD\xEF\xAB", 4)) { + Tcl_SetResult(interp, + "Tcl_ExternalToUtf wrote past output buffer", + TCL_STATIC); + result = TCL_ERROR; + } else if (result != TCL_ERROR) { + Tcl_Obj *resultObjs[3]; + switch (result) { + case TCL_OK: + resultObjs[0] = Tcl_NewStringObj("ok", -1); + break; + case TCL_CONVERT_MULTIBYTE: + resultObjs[0] = Tcl_NewStringObj("multibyte", -1); + break; + case TCL_CONVERT_SYNTAX: + resultObjs[0] = Tcl_NewStringObj("syntax", -1); + break; + case TCL_CONVERT_UNKNOWN: + resultObjs[0] = Tcl_NewStringObj("unknown", -1); + break; + case TCL_CONVERT_NOSPACE: + resultObjs[0] = Tcl_NewStringObj("nospace", -1); + break; + default: + resultObjs[0] = Tcl_NewIntObj(result); + break; + } + result = TCL_OK; + resultObjs[1] = + encStatePtr ? Tcl_NewWideIntObj((Tcl_WideInt)encState) : Tcl_NewObj(); + resultObjs[2] = Tcl_NewByteArrayObj((unsigned char *)bufPtr, dstLen); + if (srcReadVar) { + if (Tcl_ObjSetVar2(interp, + srcReadVar, + NULL, + Tcl_NewIntObj(srcRead), + TCL_LEAVE_ERR_MSG) == NULL) { + result = TCL_ERROR; + } + } + if (dstWroteVar) { + if (Tcl_ObjSetVar2(interp, + dstWroteVar, + NULL, + Tcl_NewIntObj(dstWrote), + TCL_LEAVE_ERR_MSG) == NULL) { + result = TCL_ERROR; + } + } + if (dstCharsVar) { + if (Tcl_ObjSetVar2(interp, + dstCharsVar, + NULL, + Tcl_NewIntObj(dstChars), + TCL_LEAVE_ERR_MSG) == NULL) { + result = TCL_ERROR; + } + } + Tcl_SetObjResult(interp, Tcl_NewListObj(3, resultObjs)); + } + + ckfree(bufPtr); + Tcl_FreeEncoding(encoding); /* Free returned reference */ + return result; +} + +/* *---------------------------------------------------------------------- * * TestencodingCmd -- @@ -1845,10 +2073,10 @@ TestencodingObjCmd( const char *string; TclEncoding *encodingPtr; static const char *const optionStrings[] = { - "create", "delete", NULL + "create", "delete", "Tcl_ExternalToUtf", "Tcl_UtfToExternal", NULL }; enum options { - ENC_CREATE, ENC_DELETE + ENC_CREATE, ENC_DELETE, ENC_EXTTOUTF, ENC_UTFTOEXT }; if (Tcl_GetIndexFromObj(interp, objv[1], optionStrings, "option", 0, @@ -1894,6 +2122,10 @@ TestencodingObjCmd( Tcl_FreeEncoding(encoding); Tcl_FreeEncoding(encoding); break; + case ENC_EXTTOUTF: + return UtfExtWrapper(interp,Tcl_ExternalToUtf,objc,objv); + case ENC_UTFTOEXT: + return UtfExtWrapper(interp,Tcl_UtfToExternal,objc,objv); } return TCL_OK; } diff --git a/tests/encoding.test b/tests/encoding.test index f6f9abc..26efb19 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -739,6 +739,41 @@ test encoding-28.0 {all encodings load} -body { runtests +test encoding-bug-183a1adcc0-1 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints { + testencoding +} -body { + # Note - buffers are initialized to \xff + list [catch {testencoding Tcl_UtfToExternal unicode A {start end} {} 1} result] $result +} -result [list 0 [list nospace {} \xff]] + +test encoding-bug-183a1adcc0-2 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints { + testencoding +} -body { + # Note - buffers are initialized to \xff + list [catch {testencoding Tcl_UtfToExternal unicode A {start end} {} 0} result] $result +} -result [list 0 [list nospace {} {}]] + +test encoding-bug-183a1adcc0-3 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints { + testencoding +} -body { + # Note - buffers are initialized to \xff + list [catch {testencoding Tcl_UtfToExternal unicode A {start end} {} 2} result] $result +} -result [list 0 [list nospace {} \x00\x00]] + +test encoding-bug-183a1adcc0-4 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints { + testencoding +} -body { + # Note - buffers are initialized to \xff + list [catch {testencoding Tcl_UtfToExternal unicode A {start end} {} 3} result] $result +} -result [list 0 [list nospace {} \x00\x00\xff]] + +test encoding-bug-183a1adcc0-5 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints { + testencoding +} -body { + # Note - buffers are initialized to \xff + list [catch {testencoding Tcl_UtfToExternal unicode A {start end} {} 4} result] $result +} -result [list 0 [list ok {} [expr {$::tcl_platform(byteOrder) eq "littleEndian" ? "\x41\x00" : "\x00\x41"}]\x00\x00]] + } # cleanup -- cgit v0.12 From 18a99f2522b77516b62a0d44dca1c90b3479bda1 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 14 Mar 2023 10:05:24 +0000 Subject: Add "ucs-2" constraint to encoding-bug-183a1adcc0-5 testcase, otherwise it fails with TCL_UTF_MAX>3. Broken by [47857515422b8519|this] commit --- tests/encoding.test | 2 +- tests/ioTrans.test | 2 +- win/tclWinTest.c | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/encoding.test b/tests/encoding.test index 26efb19..bac80c9 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -768,7 +768,7 @@ test encoding-bug-183a1adcc0-4 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExtern } -result [list 0 [list nospace {} \x00\x00\xff]] test encoding-bug-183a1adcc0-5 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints { - testencoding + testencoding ucs-2 } -body { # Note - buffers are initialized to \xff list [catch {testencoding Tcl_UtfToExternal unicode A {start end} {} 4} result] $result diff --git a/tests/ioTrans.test b/tests/ioTrans.test index 130ff80..3a23e61 100644 --- a/tests/ioTrans.test +++ b/tests/ioTrans.test @@ -671,7 +671,7 @@ namespace eval inputfilter { proc initialize {chan mode} { return {initialize finalize read} } - + proc read {chan buffer} { return $buffer } diff --git a/win/tclWinTest.c b/win/tclWinTest.c index d70d217..6ca49f6 100644 --- a/win/tclWinTest.c +++ b/win/tclWinTest.c @@ -419,7 +419,7 @@ TestplatformChmod( const char *nativePath, int pmode) { - /* + /* * Note FILE_DELETE_CHILD missing from dirWriteMask because we do * not want overriding of child's delete setting when testing */ @@ -427,7 +427,7 @@ TestplatformChmod( FILE_WRITE_ATTRIBUTES | FILE_WRITE_EA | FILE_ADD_FILE | FILE_ADD_SUBDIRECTORY | STANDARD_RIGHTS_WRITE | DELETE | SYNCHRONIZE; - static const DWORD dirReadMask = + static const DWORD dirReadMask = FILE_READ_ATTRIBUTES | FILE_READ_EA | FILE_LIST_DIRECTORY | STANDARD_RIGHTS_READ | SYNCHRONIZE; /* Note - default user privileges allow ignoring TRAVERSE setting */ @@ -437,7 +437,7 @@ TestplatformChmod( static const DWORD fileWriteMask = FILE_WRITE_ATTRIBUTES | FILE_WRITE_EA | FILE_WRITE_DATA | FILE_APPEND_DATA | STANDARD_RIGHTS_WRITE | DELETE | SYNCHRONIZE; - static const DWORD fileReadMask = + static const DWORD fileReadMask = FILE_READ_ATTRIBUTES | FILE_READ_EA | FILE_READ_DATA | STANDARD_RIGHTS_READ | SYNCHRONIZE; static const DWORD fileExecuteMask = @@ -471,7 +471,7 @@ TestplatformChmod( if (!OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &hToken)) { goto done; } - + /* Get process SID */ if (!GetTokenInformation(hToken, TokenUser, NULL, 0, &dw) && GetLastError() != ERROR_INSUFFICIENT_BUFFER) { @@ -489,7 +489,7 @@ TestplatformChmod( ckfree(aceEntry[nSids].pSid); /* Since we have not ++'ed nSids */ goto done; } - /* + /* * Always include DACL modify rights so we don't get locked out */ aceEntry[nSids].mask = READ_CONTROL | WRITE_DAC | WRITE_OWNER | SYNCHRONIZE | -- cgit v0.12