From 975d478bfaf46abfe1b34bdbd82dd0dc9556d864 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 1 Apr 2021 09:10:13 +0000 Subject: More bugfixes (and testcases showing this) --- generic/tclCmdAH.c | 2 +- generic/tclEncoding.c | 17 +++++++++++------ tests/encoding.test | 5 ++++- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 0c0a4a4..1dfabd2 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -678,7 +678,7 @@ EncodingConverttoObjCmd( int ucs4; TclUtfToUCS4(&stringPtr[result], &ucs4); Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected character at index %" - TCL_LL_MODIFIER "u: '%1s' (U+%06X)", (long long)pos, &stringPtr[result], ucs4)); + TCL_LL_MODIFIER "u: '%c' (U+%06X)", (long long)pos, ucs4, ucs4)); Tcl_DStringFree(&ds); return TCL_ERROR; } diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index d28fc8c..6cf0d76 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2303,18 +2303,23 @@ UtfToUtfProc( * unless the user has explicitly asked to be told. */ - if (flags & TCL_ENCODING_STOPONERROR) { - result = TCL_CONVERT_MULTIBYTE; - break; + if (flags & TCL_ENCODING_MODIFIED) { + if (flags & TCL_ENCODING_STOPONERROR) { + result = TCL_CONVERT_MULTIBYTE; + break; + } + ch = UCHAR(*src++); + } else { + char chbuf[2]; + chbuf[0] = UCHAR(*src++); chbuf[1] = 0; + TclUtfToUCS4(chbuf, &ch); } - ch = UCHAR(*src); - src += 1; dst += Tcl_UniCharToUtf(ch, dst); } else { int low; const char *saveSrc = src; size_t len = TclUtfToUCS4(src, &ch); - if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_STOPONERROR)) { + if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_STOPONERROR) && (flags & TCL_ENCODING_MODIFIED)) { result = TCL_CONVERT_SYNTAX; break; } diff --git a/tests/encoding.test b/tests/encoding.test index 45b5f49..3b3f42c 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -671,8 +671,11 @@ test encoding-24.17 {Parse valid or invalid utf-8} -constraints testbytestring - } -returnCodes 1 -result {expected byte sequence but character 1 was '䍃€' (U+004343)} test encoding-24.18 {Parse valid or invalid utf-8} -constraints testbytestring -body { encoding convertto utf-8 [testbytestring "Z\xE0\x80"] -stoponerror -} -returnCodes 1 -match glob -result {unexpected character at index 1: '*' (U+0000E0)} +} -result "Z\xC3\xA0\xE2\x82\xAC" test encoding-24.19 {Parse valid or invalid utf-8} -constraints testbytestring -body { + encoding convertto utf-8 [testbytestring "Z\xE0\x80xxxxxx"] -stoponerror +} -result "Z\xC3\xA0\xE2\x82\xACxxxxxx" +test encoding-24.20 {Parse valid or invalid utf-8} -constraints testbytestring -body { encoding convertto utf-8 "ZX\uD800" -stoponerror } -returnCodes 1 -match glob -result "unexpected character at index 2: '\uD800' (U+00D800)" -- cgit v0.12