summaryrefslogtreecommitdiffstats
path: root/generic/tclEncoding.c
diff options
context:
space:
mode:
authorapnadkarni <apnmbx-wits@yahoo.com>2024-08-19 04:55:49 (GMT)
committerapnadkarni <apnmbx-wits@yahoo.com>2024-08-19 04:55:49 (GMT)
commit81081c973f15b24b6d4b03d91d0d055e60e030ed (patch)
tree03cb12c9ce424bed5b73cdb62efad80c2ee55558 /generic/tclEncoding.c
parentaa3055e4f8820d2fea205376433608f7221de21f (diff)
downloadtcl-81081c973f15b24b6d4b03d91d0d055e60e030ed.zip
tcl-81081c973f15b24b6d4b03d91d0d055e60e030ed.tar.gz
tcl-81081c973f15b24b6d4b03d91d0d055e60e030ed.tar.bz2
Deal with fragmented and split cesu-8 surrogates.
Diffstat (limited to 'generic/tclEncoding.c')
-rw-r--r--generic/tclEncoding.c12
1 files changed, 7 insertions, 5 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 3f73faa..ae73c77 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2557,7 +2557,11 @@ UtfToUtfProc(
}
} else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {
- /* Incomplete byte sequence. */
+ /*
+ * Incomplete byte sequence not because there are insufficient
+ * bytes in source buffer (have already checked that above) but
+ * because the UTF-8 sequence is truncated.
+ */
CHECK_ISOLATEDSURROGATE;
@@ -2668,7 +2672,7 @@ UtfToUtfProc(
}
} else {
/* High surrogate saved in *statePtr. Do not output anything just yet. */
- --numChars; /* XXX - TODO */
+ --numChars; /* Cancel the increment at end of loop */
continue;
}
}
@@ -2687,7 +2691,6 @@ UtfToUtfProc(
assert(!(flags & ENCODING_UTF)); /* CESU-8, Not UTF-8 */
if (!(flags & TCL_ENCODING_END)) {
/* More data coming */
- --numChars; /* XXX - TODO */
} else {
/* No more data coming */
if (PROFILE_STRICT(profile)) {
@@ -2701,10 +2704,9 @@ UtfToUtfProc(
}
if (dst < dstEnd) {
dst += Tcl_UniCharToUtf(ch, dst);
+ ++numChars;
} else {
/* No room in destination */
- assert(numChars > 0);
- --numChars; /* XXX - TODO - Since it was incremented in loop above */
result = TCL_CONVERT_NOSPACE;
}
}