From fea9c41d92f884d7d684647ac911c4ce45e9209f Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 9 Dec 2019 09:59:02 +0000 Subject: Add special handling of lower surrogate to UtfToUtfProc(), so valgrind should't complain in that situation. With new test-cases covering that. --- generic/tclEncoding.c | 8 ++++---- tests/encoding.test | 18 ++++++++++++++---- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 69075bd..ffa23f3 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2361,16 +2361,16 @@ UtfToUtfProc( dst += Tcl_UniCharToUtf(*chPtr, dst); } else { src += TclUtfToUniChar(src, chPtr); - if ((*chPtr | 0x3FF) == 0xDBFF) { - /* A high surrogate character is detected, handle especially */ + if ((*chPtr | 0x7FF) == 0xDFFF) { + /* A surrogate character is detected, handle especially */ Tcl_UniChar low = *chPtr; size_t len = (src <= srcEnd-3) ? Tcl_UtfToUniChar(src, &low) : 0; - if ((low | 0x3FF) != 0xDFFF) { + if (((low | 0x3FF) != 0xDFFF) || !(*chPtr & 0x800)) { *dst++ = (char) (((*chPtr >> 12) | 0xE0) & 0xEF); *dst++ = (char) (((*chPtr >> 6) | 0x80) & 0xBF); *dst++ = (char) ((*chPtr | 0x80) & 0xBF); continue; - } else if (pureNullMode == 1) { + } else if ((TCL_UTF_MAX > 3) || (pureNullMode == 1)) { int full = (((*chPtr & 0x3FF) << 10) | (low & 0x3FF)) + 0x10000; *dst++ = (char) (((full >> 18) | 0xF0) & 0xF7); *dst++ = (char) (((full >> 12) | 0x80) & 0xBF); diff --git a/tests/encoding.test b/tests/encoding.test index b11c731..b5a44df 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -368,13 +368,23 @@ test encoding-16.1 {UnicodeToUtfProc} { list $val [format %x [scan $val %c]] } "\u4e4e 4e4e" test encoding-16.2 {UnicodeToUtfProc} -constraints fullutf -body { - set val [encoding convertfrom unicode "\xd8\xd8\xdc\xdc"] + set val [encoding convertfrom unicode "\xD8\xD8\xDC\xDC"] list $val [format %x [scan $val %c]] -} -result "\U460dc 460dc" +} -result "\U460DC 460dc" +test encoding-16.3 {UnicodeToUtfProc} -body { + set val [encoding convertfrom unicode "\xDC\xDC"] + list $val [format %x [scan $val %c]] +} -result "\uDCDC dcdc" test encoding-17.1 {UtfToUnicodeProc} -constraints fullutf -body { - encoding convertto unicode "\U460dc" -} -result "\xd8\xd8\xdc\xdc" + encoding convertto unicode "\U460DC" +} -result "\xD8\xD8\xDC\xDC" +test encoding-17.2 {UtfToUnicodeProc} -body { + encoding convertto unicode "\uDCDC" +} -result "\xDC\xDC" +test encoding-17.3 {UtfToUnicodeProc} -body { + encoding convertto unicode "\uD8D8" +} -result "\xD8\xD8" test encoding-18.1 {TableToUtfProc} { } {} -- cgit v0.12