From 76130df46050131c3a1c4ec22d6adbfaa637f2a7 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Tue, 25 Apr 2023 20:34:03 +0000 Subject: Fix for issue [f5eadcbf9a], passing pointer to uninitialized memory leads Tcl_UniCharToUtf() to corrupt data. --- generic/tclEncoding.c | 43 +++++++++++++++++++++++++++++++++++++++++++ generic/tclStringObj.c | 6 ++++++ generic/tclUtf.c | 16 ++++++++++++++++ 3 files changed, 65 insertions(+) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 2b8e8c0..851ae64 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2532,6 +2532,14 @@ UtfToUtfProc( flags |= PTR2INT(clientData); dstEnd = dst + dstLen - ((flags & ENCODING_UTF) ? TCL_UTF_MAX : 6); + + /* Initialize the buffer so that some random data doesn't trick + * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs. + * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its + * prior non-stateful nature, this call to memset can also be removed. + */ + memset(dst, 0, dstLen); + profile = CHANNEL_PROFILE_GET(flags); for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { @@ -2746,6 +2754,13 @@ Utf32ToUtfProc( } result = TCL_OK; + /* Initialize the buffer so that some random data doesn't trick + * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs. + * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its + * prior non-stateful nature, this call to memset can also be removed. + */ + memset(dst, 0, dstLen); + /* * Check alignment with utf-32 (4 == sizeof(UTF-32)) */ @@ -3015,6 +3030,13 @@ Utf16ToUtfProc( } result = TCL_OK; + /* Initialize the buffer so that some random data doesn't trick + * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs. + * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its + * prior non-stateful nature, this call to memset can also be removed. + */ + memset(dst, 0, dstLen); + /* * Check alignment with utf-16 (2 == sizeof(UTF-16)) */ @@ -3428,6 +3450,13 @@ TableToUtfProc( dstStart = dst; dstEnd = dst + dstLen - TCL_UTF_MAX; + /* Initialize the buffer so that some random data doesn't trick + * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs. + * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its + * prior non-stateful nature, this call to memset can also be removed. + */ + memset(dst, 0, dstLen); + toUnicode = (const unsigned short *const *) dataPtr->toUnicode; prefixBytes = dataPtr->prefixBytes; pageZero = toUnicode[0]; @@ -3669,6 +3698,13 @@ Iso88591ToUtfProc( dstStart = dst; dstEnd = dst + dstLen - TCL_UTF_MAX; + /* Initialize the buffer so that some random data doesn't trick + * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs. + * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its + * prior non-stateful nature, this call to memset can also be removed. + */ + memset(dst, 0, dstLen); + result = TCL_OK; for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { Tcl_UniChar ch = 0; @@ -3908,6 +3944,13 @@ EscapeToUtfProc( dstStart = dst; dstEnd = dst + dstLen - TCL_UTF_MAX; + /* Initialize the buffer so that some random data doesn't trick + * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs. + * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its + * prior non-stateful nature, this call to memset can also be removed. + */ + memset(dst, 0, dstLen); + state = PTR2INT(*statePtr); if (flags & TCL_ENCODING_START) { state = 0; diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index fb7294b..3b1f0fb 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -4497,6 +4497,12 @@ ExtendStringRepWithUnicode( copyBytes: dst = objPtr->bytes + origLength; + /* Initialize the buffer so that some random data doesn't trick + * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs. + * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its + * prior non-stateful nature, this call to memset can also be removed. + */ + memset(dst, 0, stringPtr->allocated - origLength); for (i = 0; i < numChars; i++) { dst += Tcl_UniCharToUtf(unicode[i], dst); } diff --git a/generic/tclUtf.c b/generic/tclUtf.c index cc5769f..42d2bea 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -348,6 +348,14 @@ Tcl_UniCharToUtfDString( p = string; wEnd = uniStr + uniLength; + + /* Initialize the buffer so that some random data doesn't trick + * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs. + * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its + * prior non-stateful nature, this call to memset can also be removed. + */ + memset(p, 0, Tcl_DStringLength(dsPtr) - oldLength); + for (w = uniStr; w < wEnd; ) { p += Tcl_UniCharToUtf(*w, p); w++; @@ -391,6 +399,14 @@ Tcl_Char16ToUtfDString( p = string; wEnd = uniStr + uniLength; + + /* Initialize the buffer so that some random data doesn't trick + * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs. + * Because TCL_COMBINE is used here, memset() is required even when + * TCL_UTF_MAX == 4. + */ + memset(p, 0, Tcl_DStringLength(dsPtr) - oldLength); + for (w = uniStr; w < wEnd; ) { if (!len && ((*w & 0xFC00) != 0xDC00)) { /* Special case for handling high surrogates. */ -- cgit v0.12 From 7f433932f67e088cdf21d42138a2fd4c96620ef0 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Thu, 27 Apr 2023 20:34:22 +0000 Subject: memset(0xff) instead of memset(0) to accomodate tests that fill buffer with 0xff. --- generic/tclEncoding.c | 12 ++++++------ generic/tclStringObj.c | 2 +- generic/tclUtf.c | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 851ae64..abce00b 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2538,7 +2538,7 @@ UtfToUtfProc( * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its * prior non-stateful nature, this call to memset can also be removed. */ - memset(dst, 0, dstLen); + memset(dst, 0xff, dstLen); profile = CHANNEL_PROFILE_GET(flags); for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { @@ -2759,7 +2759,7 @@ Utf32ToUtfProc( * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its * prior non-stateful nature, this call to memset can also be removed. */ - memset(dst, 0, dstLen); + memset(dst, 0xff, dstLen); /* * Check alignment with utf-32 (4 == sizeof(UTF-32)) @@ -3035,7 +3035,7 @@ Utf16ToUtfProc( * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its * prior non-stateful nature, this call to memset can also be removed. */ - memset(dst, 0, dstLen); + memset(dst, 0xff, dstLen); /* * Check alignment with utf-16 (2 == sizeof(UTF-16)) @@ -3455,7 +3455,7 @@ TableToUtfProc( * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its * prior non-stateful nature, this call to memset can also be removed. */ - memset(dst, 0, dstLen); + memset(dst, 0xff, dstLen); toUnicode = (const unsigned short *const *) dataPtr->toUnicode; prefixBytes = dataPtr->prefixBytes; @@ -3703,7 +3703,7 @@ Iso88591ToUtfProc( * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its * prior non-stateful nature, this call to memset can also be removed. */ - memset(dst, 0, dstLen); + memset(dst, 0xff, dstLen); result = TCL_OK; for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { @@ -3949,7 +3949,7 @@ EscapeToUtfProc( * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its * prior non-stateful nature, this call to memset can also be removed. */ - memset(dst, 0, dstLen); + memset(dst, 0xff, dstLen); state = PTR2INT(*statePtr); if (flags & TCL_ENCODING_START) { diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 3b1f0fb..6bc1c18 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -4502,7 +4502,7 @@ ExtendStringRepWithUnicode( * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its * prior non-stateful nature, this call to memset can also be removed. */ - memset(dst, 0, stringPtr->allocated - origLength); + memset(dst, 0xff, stringPtr->allocated - origLength); for (i = 0; i < numChars; i++) { dst += Tcl_UniCharToUtf(unicode[i], dst); } diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 42d2bea..54cef2f 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -354,7 +354,7 @@ Tcl_UniCharToUtfDString( * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its * prior non-stateful nature, this call to memset can also be removed. */ - memset(p, 0, Tcl_DStringLength(dsPtr) - oldLength); + memset(p, 0xff, Tcl_DStringLength(dsPtr) - oldLength); for (w = uniStr; w < wEnd; ) { p += Tcl_UniCharToUtf(*w, p); @@ -405,7 +405,7 @@ Tcl_Char16ToUtfDString( * Because TCL_COMBINE is used here, memset() is required even when * TCL_UTF_MAX == 4. */ - memset(p, 0, Tcl_DStringLength(dsPtr) - oldLength); + memset(p, 0xff, Tcl_DStringLength(dsPtr) - oldLength); for (w = uniStr; w < wEnd; ) { if (!len && ((*w & 0xFC00) != 0xDC00)) { -- cgit v0.12