From c60ee515ee3df4dfa977ba55edda499328b35566 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 21 Jun 2021 09:29:18 +0000 Subject: Fix [048dd20b4171c8da]: cesu-8 encoding fails on \u80 --- generic/tclEncoding.c | 2 +- tests/encoding.test | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 21c254e..9367863 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2280,7 +2280,7 @@ UtfToUtfProc( break; } src += len; - if (!(flags & TCL_ENCODING_UTF)) { + if (!(flags & TCL_ENCODING_UTF) && (ch > 0x3FF)) { if (ch > 0xFFFF) { /* CESU-8 6-byte sequence for chars > U+FFFF */ ch -= 0x10000; diff --git a/tests/encoding.test b/tests/encoding.test index 21610a7..6fc3349 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -429,6 +429,21 @@ test encoding-15.21 {UtfToUtfProc CESU-8 noncharacter} { binary scan $y H* z list [string length $y] $z } {3 efbfbf} +test encoding-15.22 {UtfToUtfProc CESU-8 bug [048dd20b4171c8da]} { + set y [encoding convertto cesu-8 \x80] + binary scan $y H* z + list [string length $y] $z +} {2 c280} +test encoding-15.22 {UtfToUtfProc CESU-8 bug [048dd20b4171c8da]} { + set y [encoding convertto cesu-8 \u100] + binary scan $y H* z + list [string length $y] $z +} {2 c480} +test encoding-15.22 {UtfToUtfProc CESU-8 bug [048dd20b4171c8da]} { + set y [encoding convertto cesu-8 \u3FF] + binary scan $y H* z + list [string length $y] $z +} {2 cfbf} test encoding-16.1 {Utf16ToUtfProc} -body { set val [encoding convertfrom utf-16 NN] -- cgit v0.12