diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2023-05-09 15:43:04 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2023-05-09 15:43:04 (GMT) |
commit | 6701945b8164a4d422b31aec96cac78a204b3804 (patch) | |
tree | 0902740fe620ed58e010ba3231734b6e2ea62d0b /generic/tclEncoding.c | |
parent | 91c305a5f3924fdd07b574ce025113cec013fd06 (diff) | |
parent | e95d2eea7fc037bb5d2f5cd1cc3bb47211b2fd2d (diff) | |
download | tcl-6701945b8164a4d422b31aec96cac78a204b3804.zip tcl-6701945b8164a4d422b31aec96cac78a204b3804.tar.gz tcl-6701945b8164a4d422b31aec96cac78a204b3804.tar.bz2 |
Merge 9.0
Diffstat (limited to 'generic/tclEncoding.c')
-rw-r--r-- | generic/tclEncoding.c | 66 |
1 files changed, 60 insertions, 6 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index d13c923..1a8fd84 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -10,7 +10,6 @@ */ #include "tclInt.h" -#include "tclIO.h" typedef size_t (LengthProc)(const char *src); @@ -1159,7 +1158,7 @@ Tcl_ExternalToUtfDString( * Tcl_ExternalToUtfDStringEx -- * * Convert a source buffer from the specified encoding into UTF-8. - * The parameter flags controls the behavior, if any of the bytes in + * "flags" controls the behavior if any of the bytes in * the source buffer are invalid or cannot be represented in utf-8. * Possible flags values: * target encoding. It should be composed by OR-ing the following: @@ -2517,6 +2516,16 @@ UtfToUtfProc( flags |= PTR2INT(clientData); dstEnd = dst + dstLen - ((flags & ENCODING_UTF) ? TCL_UTF_MAX : 6); + +#if TCL_UTF_MAX < 4 + /* Initialize the buffer so that some random data doesn't trick + * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs. + * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its + * prior non-stateful nature, this call to memset can also be removed. + */ + memset(dst, 0xff, dstLen); +#endif + profile = ENCODING_PROFILE_GET(flags); for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { @@ -2564,10 +2573,10 @@ UtfToUtfProc( } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { /* * Incomplete byte sequence. - * Always check before using TclUtfToUCS4. Not doing can so - * cause it run beyond the end of the buffer! If we happen such an - * incomplete char its bytes are made to represent themselves - * unless the user has explicitly asked to be told. + * Always check before using TclUtfToUCS4. Not doing so can cause it + * run beyond the end of the buffer! If we happen on such an incomplete + * char its bytes are made to represent themselves unless the user has + * explicitly asked to be told. */ if (flags & ENCODING_INPUT) { @@ -2730,6 +2739,15 @@ Utf32ToUtfProc( } result = TCL_OK; +#if TCL_UTF_MAX < 4 + /* Initialize the buffer so that some random data doesn't trick + * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs. + * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its + * prior non-stateful nature, this call to memset can also be removed. + */ + memset(dst, 0xff, dstLen); +#endif + /* * Check alignment with utf-32 (4 == sizeof(UTF-32)) */ @@ -2997,6 +3015,15 @@ Utf16ToUtfProc( } result = TCL_OK; +#if TCL_UTF_MAX < 4 + /* Initialize the buffer so that some random data doesn't trick + * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs. + * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its + * prior non-stateful nature, this call to memset can also be removed. + */ + memset(dst, 0xff, dstLen); +#endif + /* * Check alignment with utf-16 (2 == sizeof(UTF-16)) */ @@ -3407,6 +3434,15 @@ TableToUtfProc( dstStart = dst; dstEnd = dst + dstLen - TCL_UTF_MAX; +#if TCL_UTF_MAX < 4 + /* Initialize the buffer so that some random data doesn't trick + * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs. + * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its + * prior non-stateful nature, this call to memset can also be removed. + */ + memset(dst, 0xff, dstLen); +#endif + toUnicode = (const unsigned short *const *) dataPtr->toUnicode; prefixBytes = dataPtr->prefixBytes; pageZero = toUnicode[0]; @@ -3646,6 +3682,15 @@ Iso88591ToUtfProc( dstStart = dst; dstEnd = dst + dstLen - TCL_UTF_MAX; +#if TCL_UTF_MAX < 4 + /* Initialize the buffer so that some random data doesn't trick + * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs. + * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its + * prior non-stateful nature, this call to memset can also be removed. + */ + memset(dst, 0xff, dstLen); +#endif + result = TCL_OK; for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { Tcl_UniChar ch = 0; @@ -3883,6 +3928,15 @@ EscapeToUtfProc( dstStart = dst; dstEnd = dst + dstLen - TCL_UTF_MAX; +#if TCL_UTF_MAX < 4 + /* Initialize the buffer so that some random data doesn't trick + * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs. + * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its + * prior non-stateful nature, this call to memset can also be removed. + */ + memset(dst, 0xff, dstLen); +#endif + state = PTR2INT(*statePtr); if (flags & TCL_ENCODING_START) { state = 0; |