From fd98096770210ed4e5e1ddbf93c7d3860716efcb Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 16 Dec 2022 10:23:56 +0000 Subject: Addendum to [https://core.tcl-lang.org/tips/doc/trunk/tip/601.md|TIP #601]. Add check for characters > U+10FFFF in utf-32. Since utf-8 cannot handle that, either replace it with the replacement character (-nocomplain) or throw an exception (-strict). --- generic/tclEncoding.c | 2 +- tests/encoding.test | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 78b0b9d..f81b0eb 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2565,7 +2565,7 @@ Utf32ToUtfProc( } else { ch = (src[0] & 0xFF) << 24 | (src[1] & 0xFF) << 16 | (src[2] & 0xFF) << 8 | (src[3] & 0xFF); } - if (ch >= 0x10FFFF || (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) + if ((unsigned)ch > 0x10FFFF || (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) && !Tcl_UniCharIsUnicode(ch))) { if (STOPONERROR) { result = TCL_CONVERT_SYNTAX; diff --git a/tests/encoding.test b/tests/encoding.test index db70744..19c7cca 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -506,6 +506,12 @@ test encoding-17.7 {UtfToUtf16Proc} -body { test encoding-17.8 {UtfToUtf16Proc} -body { encoding convertto -strict utf-16le "\uD8D8" } -returnCodes error -result {unexpected character at index 0: 'U+00D8D8'} +test encoding-17.9 {Utf32ToUtfProc} -body { + encoding convertfrom -strict utf-32 "\xFF\xFF\xFF\xFF" +} -returnCodes error -result {unexpected byte sequence starting at index 0: '\xFF'} +test encoding-17.10 {Utf32ToUtfProc} -body { + encoding convertfrom -nocomplain utf-32 "\xFF\xFF\xFF\xFF" +} -result \uFFFD test encoding-18.1 {TableToUtfProc} { } {} -- cgit v0.12