From f06c5e7af1c85806bcbce3202000670b90ab4528 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 15 Feb 2023 20:26:10 +0000 Subject: Fix for [33ab6d08eb]: Inconsistent behavior with encoding convertfrom -failindex --- generic/tclEncoding.c | 2 +- tests/encoding.test | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index c4db314..af7f30a 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2433,7 +2433,7 @@ UtfToUtfProc( const char *saveSrc = src; size_t len = TclUtfToUCS4(src, &ch); if ((len < 2) && (ch != 0) && (flags & ENCODING_INPUT) - && (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) { + && (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX))) { result = TCL_CONVERT_SYNTAX; break; } diff --git a/tests/encoding.test b/tests/encoding.test index 916a84a..6f1a760 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -617,9 +617,12 @@ test encoding-19.3 {TableFromUtfProc} -body { test encoding-19.4 {TableFromUtfProc} -body { list [encoding convertfrom -failindex idx ascii AÁ] [set idx] } -result {A 1} -test encoding-19.4 {TableFromUtfProc} -body { +test encoding-19.5 {TableFromUtfProc} -body { list [encoding convertfrom -failindex idx -strict ascii AÁ] [set idx] } -result {A 1} +test encoding-19.6 {TableFromUtfProc} -body { + list [encoding convertfrom -failindex idx -strict ascii AÁB] [set idx] +} -result {A 1} test encoding-20.1 {TableFreefProc} { } {} -- cgit v0.12 From 0563a789022a80cd7745d596028b570f0fb24cbb Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 16 Feb 2023 16:59:10 +0000 Subject: Fix [5e6ae6e05e]: Implement -strict correctly for cesu-8 --- generic/tclEncoding.c | 24 +++++++++++++++--------- tests/encoding.test | 13 ++++++++----- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index c4db314..73cbc5c 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -564,7 +564,7 @@ TclInitEncodingSubsystem(void) type.nullSize = 1; type.clientData = INT2PTR(ENCODING_UTF); Tcl_CreateEncoding(&type); - type.clientData = INT2PTR(TCL_ENCODING_NOCOMPLAIN); + type.clientData = INT2PTR(0); type.encodingName = "cesu-8"; Tcl_CreateEncoding(&type); @@ -2388,13 +2388,13 @@ UtfToUtfProc( *dst++ = *src++; } else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) - && (UCHAR(src[1]) == 0x80) && (flags & ENCODING_UTF) && (!(flags & ENCODING_INPUT) + && (UCHAR(src[1]) == 0x80) && !(flags & TCL_ENCODING_MODIFIED) && (!(flags & ENCODING_INPUT) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX))) { /* * If in input mode, and -strict or -failindex is specified: This is an error. */ - if (flags & ENCODING_INPUT) { + if ((STOPONERROR) && (flags & ENCODING_INPUT)) { result = TCL_CONVERT_SYNTAX; break; } @@ -2430,15 +2430,21 @@ UtfToUtfProc( dst += Tcl_UniCharToUtf(ch, dst); } else { int low; - const char *saveSrc = src; size_t len = TclUtfToUCS4(src, &ch); - if ((len < 2) && (ch != 0) && (flags & ENCODING_INPUT) - && (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) { - result = TCL_CONVERT_SYNTAX; - break; + if (flags & ENCODING_INPUT) { + if ((len < 2) && (ch != 0) + && ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) { + result = TCL_CONVERT_SYNTAX; + break; + } else if ((ch > 0xFFFF) && !(flags & ENCODING_UTF) + && ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) { + result = TCL_CONVERT_SYNTAX; + break; + } } + const char *saveSrc = src; src += len; - if (!(flags & ENCODING_UTF) && (ch > 0x3FF)) { + if (!(flags & ENCODING_UTF) && !(flags & ENCODING_INPUT) && (ch > 0x3FF)) { if (ch > 0xFFFF) { /* CESU-8 6-byte sequence for chars > U+FFFF */ ch -= 0x10000; diff --git a/tests/encoding.test b/tests/encoding.test index 916a84a..34dfafb 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -459,17 +459,20 @@ test encoding-15.26 {UtfToUtfProc CESU-8} { encoding convertfrom cesu-8 \xC0\x80 } \x00 test encoding-15.27 {UtfToUtfProc -strict CESU-8} { - encoding convertfrom -strict cesu-8 \xC0\x80 + encoding convertfrom -strict cesu-8 \x00 } \x00 -test encoding-15.28 {UtfToUtfProc -strict CESU-8} { +test encoding-15.28 {UtfToUtfProc -strict CESU-8} -body { encoding convertfrom -strict cesu-8 \xC0\x80 -} \x00 +} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} test encoding-15.29 {UtfToUtfProc CESU-8} { encoding convertto cesu-8 \x00 -} \xC0\x80 +} \x00 test encoding-15.30 {UtfToUtfProc -strict CESU-8} { encoding convertto -strict cesu-8 \x00 -} \xC0\x80 +} \x00 +test encoding-15.31 {UtfToUtfProc -strict CESU-8 (bytes F0-F4 are invalid)} -body { + encoding convertfrom -strict cesu-8 \xF1\x86\x83\x9C +} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xF1'} test encoding-16.1 {Utf16ToUtfProc} -body { set val [encoding convertfrom utf-16 NN] -- cgit v0.12