summaryrefslogtreecommitdiffstats
path: root/generic/tclEncoding.c
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2023-05-09 15:43:04 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2023-05-09 15:43:04 (GMT)
commit6701945b8164a4d422b31aec96cac78a204b3804 (patch)
tree0902740fe620ed58e010ba3231734b6e2ea62d0b /generic/tclEncoding.c
parent91c305a5f3924fdd07b574ce025113cec013fd06 (diff)
parente95d2eea7fc037bb5d2f5cd1cc3bb47211b2fd2d (diff)
downloadtcl-6701945b8164a4d422b31aec96cac78a204b3804.zip
tcl-6701945b8164a4d422b31aec96cac78a204b3804.tar.gz
tcl-6701945b8164a4d422b31aec96cac78a204b3804.tar.bz2
Merge 9.0
Diffstat (limited to 'generic/tclEncoding.c')
-rw-r--r--generic/tclEncoding.c66
1 files changed, 60 insertions, 6 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index d13c923..1a8fd84 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -10,7 +10,6 @@
*/
#include "tclInt.h"
-#include "tclIO.h"
typedef size_t (LengthProc)(const char *src);
@@ -1159,7 +1158,7 @@ Tcl_ExternalToUtfDString(
* Tcl_ExternalToUtfDStringEx --
*
* Convert a source buffer from the specified encoding into UTF-8.
- * The parameter flags controls the behavior, if any of the bytes in
+ * "flags" controls the behavior if any of the bytes in
* the source buffer are invalid or cannot be represented in utf-8.
* Possible flags values:
* target encoding. It should be composed by OR-ing the following:
@@ -2517,6 +2516,16 @@ UtfToUtfProc(
flags |= PTR2INT(clientData);
dstEnd = dst + dstLen - ((flags & ENCODING_UTF) ? TCL_UTF_MAX : 6);
+
+#if TCL_UTF_MAX < 4
+ /* Initialize the buffer so that some random data doesn't trick
+ * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs.
+ * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its
+ * prior non-stateful nature, this call to memset can also be removed.
+ */
+ memset(dst, 0xff, dstLen);
+#endif
+
profile = ENCODING_PROFILE_GET(flags);
for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
@@ -2564,10 +2573,10 @@ UtfToUtfProc(
} else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {
/*
* Incomplete byte sequence.
- * Always check before using TclUtfToUCS4. Not doing can so
- * cause it run beyond the end of the buffer! If we happen such an
- * incomplete char its bytes are made to represent themselves
- * unless the user has explicitly asked to be told.
+ * Always check before using TclUtfToUCS4. Not doing so can cause it
+ * run beyond the end of the buffer! If we happen on such an incomplete
+ * char its bytes are made to represent themselves unless the user has
+ * explicitly asked to be told.
*/
if (flags & ENCODING_INPUT) {
@@ -2730,6 +2739,15 @@ Utf32ToUtfProc(
}
result = TCL_OK;
+#if TCL_UTF_MAX < 4
+ /* Initialize the buffer so that some random data doesn't trick
+ * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs.
+ * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its
+ * prior non-stateful nature, this call to memset can also be removed.
+ */
+ memset(dst, 0xff, dstLen);
+#endif
+
/*
* Check alignment with utf-32 (4 == sizeof(UTF-32))
*/
@@ -2997,6 +3015,15 @@ Utf16ToUtfProc(
}
result = TCL_OK;
+#if TCL_UTF_MAX < 4
+ /* Initialize the buffer so that some random data doesn't trick
+ * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs.
+ * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its
+ * prior non-stateful nature, this call to memset can also be removed.
+ */
+ memset(dst, 0xff, dstLen);
+#endif
+
/*
* Check alignment with utf-16 (2 == sizeof(UTF-16))
*/
@@ -3407,6 +3434,15 @@ TableToUtfProc(
dstStart = dst;
dstEnd = dst + dstLen - TCL_UTF_MAX;
+#if TCL_UTF_MAX < 4
+ /* Initialize the buffer so that some random data doesn't trick
+ * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs.
+ * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its
+ * prior non-stateful nature, this call to memset can also be removed.
+ */
+ memset(dst, 0xff, dstLen);
+#endif
+
toUnicode = (const unsigned short *const *) dataPtr->toUnicode;
prefixBytes = dataPtr->prefixBytes;
pageZero = toUnicode[0];
@@ -3646,6 +3682,15 @@ Iso88591ToUtfProc(
dstStart = dst;
dstEnd = dst + dstLen - TCL_UTF_MAX;
+#if TCL_UTF_MAX < 4
+ /* Initialize the buffer so that some random data doesn't trick
+ * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs.
+ * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its
+ * prior non-stateful nature, this call to memset can also be removed.
+ */
+ memset(dst, 0xff, dstLen);
+#endif
+
result = TCL_OK;
for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
Tcl_UniChar ch = 0;
@@ -3883,6 +3928,15 @@ EscapeToUtfProc(
dstStart = dst;
dstEnd = dst + dstLen - TCL_UTF_MAX;
+#if TCL_UTF_MAX < 4
+ /* Initialize the buffer so that some random data doesn't trick
+ * Tcl_UniCharToUtf() into thinking it should combine surrogate pairs.
+ * Once TCL_UTF_MAX == 3 is removed and Tcl_UniCharToUtf restored to its
+ * prior non-stateful nature, this call to memset can also be removed.
+ */
+ memset(dst, 0xff, dstLen);
+#endif
+
state = PTR2INT(*statePtr);
if (flags & TCL_ENCODING_START) {
state = 0;