summaryrefslogtreecommitdiffstats
path: root/generic/tclEncoding.c
diff options
context:
space:
mode:
authorapnadkarni <apnmbx-wits@yahoo.com>2023-02-15 17:27:55 (GMT)
committerapnadkarni <apnmbx-wits@yahoo.com>2023-02-15 17:27:55 (GMT)
commitd9046229bc814b561eb59c03e0aa3627264c07ea (patch)
tree5489dfc4e6e19fea7dbd94dafb9378ebb97b416e /generic/tclEncoding.c
parent6863b7b4a55fda89c621de8eba5f17753c64b000 (diff)
downloadtcl-d9046229bc814b561eb59c03e0aa3627264c07ea.zip
tcl-d9046229bc814b561eb59c03e0aa3627264c07ea.tar.gz
tcl-d9046229bc814b561eb59c03e0aa3627264c07ea.tar.bz2
Start on expanding encoding tests
Diffstat (limited to 'generic/tclEncoding.c')
-rw-r--r--generic/tclEncoding.c41
1 files changed, 19 insertions, 22 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 7886910..8cd970f 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2409,32 +2409,29 @@ UtfToUtfProc(
*/
*dst++ = *src++;
- } else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd)
- && (UCHAR(src[1]) == 0x80) && (flags & ENCODING_UTF) && (!(flags & ENCODING_INPUT)
- || PROFILE_STRICT(profile))) {
- /*
- * \xC0\x80 and either strict profile or target is "real" UTF-8
- * - Strict profile - error
- * - Non-strict, real UTF-8 - output \x00
- */
- if (flags & ENCODING_INPUT) {
- /*
- * TODO - should above check not be against STRICT?
- * That would probably break a convertto command that goes
- * from the internal UTF8 to the real UTF8. On the other
- * hand this means, a strict UTF8->UTF8 transform is not
- * possible using this function.
- */
+ }
+ else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) &&
+ (UCHAR(src[1]) == 0x80) && (flags & ENCODING_UTF) &&
+ (!(flags & ENCODING_INPUT) || PROFILE_STRICT(profile) ||
+ PROFILE_REPLACE(profile))) {
+ /* Special sequence \xC0\x80 */
+ if (PROFILE_STRICT(profile)) {
result = TCL_CONVERT_SYNTAX;
break;
}
- /*
- * Convert 0xC080 to real nulls when we are in output mode,
- * irrespective of the profile.
- */
- *dst++ = 0;
- src += 2;
+ if (PROFILE_REPLACE(profile)) {
+ dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
+ src += 1; /* C0, 80 handled in next loop iteration
+ since dst limit has to be checked */
+ } else {
+ /*
+ * Convert 0xC080 to real nulls when we are in output mode,
+ * irrespective of the profile.
+ */
+ *dst++ = 0;
+ src += 2;
+ }
}
else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {
/*