From af98d8fa201c3de5ef583d20e6eacb2d556c15bd Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 19 Dec 2022 22:19:59 +0000 Subject: Remove all checks for noncharacters --- generic/tclEncoding.c | 11 +++++------ tests/encoding.test | 8 ++++---- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index f81b0eb..d10d9ca 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2468,13 +2468,12 @@ UtfToUtfProc( src += len; dst += Tcl_UniCharToUtf(ch, dst); ch = low; - } else if (STOPONERROR && !(flags & TCL_ENCODING_MODIFIED) && !Tcl_UniCharIsUnicode(ch) - && (((ch & ~0x7FF) == 0xD800) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) { + } else if (STOPONERROR && !(flags & TCL_ENCODING_MODIFIED) && (((ch & ~0x7FF) == 0xD800))) { result = TCL_CONVERT_UNKNOWN; src = saveSrc; break; } else if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) - && (flags & TCL_ENCODING_MODIFIED) && !Tcl_UniCharIsUnicode(ch)) { + && (flags & TCL_ENCODING_MODIFIED) && ((ch & ~0x7FF) == 0xD800)) { result = TCL_CONVERT_SYNTAX; src = saveSrc; break; @@ -2566,7 +2565,7 @@ Utf32ToUtfProc( ch = (src[0] & 0xFF) << 24 | (src[1] & 0xFF) << 16 | (src[2] & 0xFF) << 8 | (src[3] & 0xFF); } if ((unsigned)ch > 0x10FFFF || (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) - && !Tcl_UniCharIsUnicode(ch))) { + && ((ch & ~0x7FF) == 0xD800))) { if (STOPONERROR) { result = TCL_CONVERT_SYNTAX; break; @@ -2662,7 +2661,7 @@ UtfToUtf32Proc( break; } len = TclUtfToUCS4(src, &ch); - if (!Tcl_UniCharIsUnicode(ch) && (((ch & ~0x7FF) == 0xD800) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) { + if ((ch & ~0x7FF) == 0xD800) { if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; break; @@ -2864,7 +2863,7 @@ UtfToUtf16Proc( break; } len = TclUtfToUCS4(src, &ch); - if (!Tcl_UniCharIsUnicode(ch) && (((ch & ~0x7FF) == 0xD800) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) { + if ((ch & ~0x7FF) == 0xD800) { if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; break; diff --git a/tests/encoding.test b/tests/encoding.test index 19c7cca..5fd4e8c 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -703,18 +703,18 @@ test encoding-24.28 {Parse invalid utf-8 with -strict} -body { test encoding-24.29 {Parse invalid utf-8} -body { encoding convertfrom utf-8 \xEF\xBF\xBF } -result \uFFFF -test encoding-24.30 {Parse invalid utf-8 with -strict} -body { +test encoding-24.30 {Parse noncharacter with -strict} -body { encoding convertfrom -strict utf-8 \xEF\xBF\xBF -} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xEF'} +} -result \uFFFF test encoding-24.31 {Parse invalid utf-8 with -nocomplain} -body { encoding convertfrom -nocomplain utf-8 \xEF\xBF\xBF } -result \uFFFF test encoding-24.32 {Try to generate invalid utf-8} -body { encoding convertto utf-8 \uFFFF } -result \xEF\xBF\xBF -test encoding-24.33 {Try to generate invalid utf-8 with -strict} -body { +test encoding-24.33 {Try to generate noncharacter with -strict} -body { encoding convertto -strict utf-8 \uFFFF -} -returnCodes 1 -result {unexpected character at index 0: 'U+00FFFF'} +} -result \xEF\xBF\xBF test encoding-24.34 {Try to generate invalid utf-8 with -nocomplain} -body { encoding convertto -nocomplain utf-8 \uFFFF } -result \xEF\xBF\xBF -- cgit v0.12