From 50538911836e76d66a3526e5fe950134cca022d8 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 16 Feb 2023 07:59:09 +0000 Subject: Try to fix [885c86a9a0]. Doesn't work completely yet. --- generic/tclEncoding.c | 8 +++----- tests/encoding.test | 8 +++++++- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index c4db314..e178f80 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2421,12 +2421,10 @@ UtfToUtfProc( result = TCL_CONVERT_SYNTAX; break; } - ch = UCHAR(*src++); - } else { - char chbuf[2]; - chbuf[0] = UCHAR(*src++); chbuf[1] = 0; - TclUtfToUCS4(chbuf, &ch); } + char chbuf[2]; + chbuf[0] = UCHAR(*src++); chbuf[1] = 0; + Tcl_UtfToUniChar(chbuf, &ch); dst += Tcl_UniCharToUtf(ch, dst); } else { int low; diff --git a/tests/encoding.test b/tests/encoding.test index 916a84a..270c351 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -762,7 +762,7 @@ test encoding-24.14 {Parse valid or invalid utf-8} { } 1 test encoding-24.15 {Parse valid or invalid utf-8} -constraints deprecated -body { encoding convertfrom utf-8 "Z\xE0\x80" -} -result Z\xE0\x80 +} -result Z\xE0\u20AC test encoding-24.16 {Parse valid or invalid utf-8} -constraints testbytestring -body { encoding convertto utf-8 [testbytestring "Z\u4343\x80"] } -returnCodes 1 -result {expected byte sequence but character 1 was '䍃€' (U+004343)} @@ -841,6 +841,12 @@ test encoding-24.40 {Try to generate invalid utf-8 with -nocomplain} -body { test encoding-24.41 {Parse invalid utf-8 with -strict} -body { encoding convertfrom -strict utf-8 \xED\xA0\x80\xED\xB0\x80 } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'} +test encoding-24.42 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body { + encoding convertfrom -nocomplain utf-8 \xF0\x80\x80\x80 +} -result \xF0\u20AC\u20AC\u20AC€€ +test encoding-24.43 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body { + encoding convertfrom -nocomplain utf-8 \x80 +} -result \u20AC€€ file delete [file join [temporaryDirectory] iso2022.txt] -- cgit v0.12 From 45796af99db14504cedf31f0336e108930482ebf Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 16 Feb 2023 21:50:53 +0000 Subject: complete fix --- generic/tclEncoding.c | 10 +++++----- tests/encoding.test | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index fe78e03..1d3a3eb 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2417,14 +2417,14 @@ UtfToUtfProc( result = TCL_CONVERT_MULTIBYTE; break; } - if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX)) { - result = TCL_CONVERT_SYNTAX; - break; - } + if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX)) { + result = TCL_CONVERT_SYNTAX; + break; + } } char chbuf[2]; chbuf[0] = UCHAR(*src++); chbuf[1] = 0; - Tcl_UtfToUniChar(chbuf, &ch); + TclUtfToUCS4(chbuf, &ch); dst += Tcl_UniCharToUtf(ch, dst); } else { int low; diff --git a/tests/encoding.test b/tests/encoding.test index 1b41925..03f0273 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -849,10 +849,10 @@ test encoding-24.41 {Parse invalid utf-8 with -strict} -body { } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'} test encoding-24.42 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body { encoding convertfrom -nocomplain utf-8 \xF0\x80\x80\x80 -} -result \xF0\u20AC\u20AC\u20AC€€ +} -result \xF0\u20AC\u20AC\u20AC test encoding-24.43 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body { encoding convertfrom -nocomplain utf-8 \x80 -} -result \u20AC€€ +} -result \u20AC file delete [file join [temporaryDirectory] iso2022.txt] -- cgit v0.12