diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2023-02-17 19:14:42 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2023-02-17 19:14:42 (GMT) |
commit | c731ca1ffdd3e7cc90cf064ac89b2f71551958ce (patch) | |
tree | 2ab23d68db683df8b5bced7a4bd250d57e148173 | |
parent | 2c3252bc5c0a80e90ade82389f8b80faa41a6e77 (diff) | |
parent | 45796af99db14504cedf31f0336e108930482ebf (diff) | |
download | tcl-c731ca1ffdd3e7cc90cf064ac89b2f71551958ce.zip tcl-c731ca1ffdd3e7cc90cf064ac89b2f71551958ce.tar.gz tcl-c731ca1ffdd3e7cc90cf064ac89b2f71551958ce.tar.bz2 |
Fix for [885c86a9a0]: convertfrom utf8 breaks for 4 byte utf encodings
-rw-r--r-- | generic/tclEncoding.c | 16 | ||||
-rw-r--r-- | tests/encoding.test | 8 |
2 files changed, 14 insertions, 10 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 10789b1..1d3a3eb 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2417,16 +2417,14 @@ UtfToUtfProc( result = TCL_CONVERT_MULTIBYTE; break; } - if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX)) { - result = TCL_CONVERT_SYNTAX; - break; - } - ch = UCHAR(*src++); - } else { - char chbuf[2]; - chbuf[0] = UCHAR(*src++); chbuf[1] = 0; - TclUtfToUCS4(chbuf, &ch); + if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX)) { + result = TCL_CONVERT_SYNTAX; + break; + } } + char chbuf[2]; + chbuf[0] = UCHAR(*src++); chbuf[1] = 0; + TclUtfToUCS4(chbuf, &ch); dst += Tcl_UniCharToUtf(ch, dst); } else { int low; diff --git a/tests/encoding.test b/tests/encoding.test index 50a0cc2..03f0273 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -768,7 +768,7 @@ test encoding-24.14 {Parse valid or invalid utf-8} { } 1 test encoding-24.15 {Parse valid or invalid utf-8} -constraints deprecated -body { encoding convertfrom utf-8 "Z\xE0\x80" -} -result Z\xE0\x80 +} -result Z\xE0\u20AC test encoding-24.16 {Parse valid or invalid utf-8} -constraints testbytestring -body { encoding convertto utf-8 [testbytestring "Z\u4343\x80"] } -returnCodes 1 -result {expected byte sequence but character 1 was '䍃' (U+004343)} @@ -847,6 +847,12 @@ test encoding-24.40 {Try to generate invalid utf-8 with -nocomplain} -body { test encoding-24.41 {Parse invalid utf-8 with -strict} -body { encoding convertfrom -strict utf-8 \xED\xA0\x80\xED\xB0\x80 } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'} +test encoding-24.42 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body { + encoding convertfrom -nocomplain utf-8 \xF0\x80\x80\x80 +} -result \xF0\u20AC\u20AC\u20AC +test encoding-24.43 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body { + encoding convertfrom -nocomplain utf-8 \x80 +} -result \u20AC file delete [file join [temporaryDirectory] iso2022.txt] |