diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2023-02-22 17:56:42 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2023-02-22 17:56:42 (GMT) |
commit | 8f37d5c24ef176d5df911feafb5f8159098a5cc2 (patch) | |
tree | 7f3a0c4209add4b55af842c263f110a3cb692dc3 | |
parent | f0284298add52f9e6804e1d3e53c4e67fcfbc1c3 (diff) | |
parent | c3e063b0aa8200f75aef7612fe3d63d09796ad67 (diff) | |
download | tcl-8f37d5c24ef176d5df911feafb5f8159098a5cc2.zip tcl-8f37d5c24ef176d5df911feafb5f8159098a5cc2.tar.gz tcl-8f37d5c24ef176d5df911feafb5f8159098a5cc2.tar.bz2 |
Merge 8.6
-rw-r--r-- | generic/tclEncoding.c | 48 | ||||
-rw-r--r-- | tests/encoding.test | 15 |
2 files changed, 49 insertions, 14 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index d2b0efc..0490831 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2432,10 +2432,10 @@ UtfToUtfProc( if (flags & ENCODING_INPUT) { if ((len < 2) && (ch != 0) && (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX))) { - result = TCL_CONVERT_SYNTAX; - break; + goto utf8Syntax; } else if ((ch > 0xFFFF) && !(flags & ENCODING_UTF) && (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX))) { + utf8Syntax: result = TCL_CONVERT_SYNTAX; break; } @@ -2543,7 +2543,7 @@ Utf32ToUtfProc( const char *srcStart, *srcEnd; const char *dstEnd, *dstStart; int result, numChars, charLimit = INT_MAX; - int ch = 0; + int ch = 0, bytesLeft = srcLen % 4; flags |= PTR2INT(clientData); if (flags & TCL_ENCODING_CHAR_LIMIT) { @@ -2555,9 +2555,9 @@ Utf32ToUtfProc( * Check alignment with utf-32 (4 == sizeof(UTF-32)) */ - if ((srcLen % 4) != 0) { + if (bytesLeft != 0) { result = TCL_CONVERT_MULTIBYTE; - srcLen &= -4; + srcLen -= bytesLeft; } /* @@ -2621,6 +2621,21 @@ Utf32ToUtfProc( /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ dst += Tcl_UniCharToUtf(-1, dst); } + if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) { + /* We have a single byte left-over at the end */ + if (dst > dstEnd) { + result = TCL_CONVERT_NOSPACE; + } else { + /* destination is not full, so we really are at the end now */ + if (flags & TCL_ENCODING_STOPONERROR) { + result = TCL_CONVERT_SYNTAX; + } else { + dst += Tcl_UniCharToUtf(0xFFFD, dst); + numChars++; + src += bytesLeft; + } + } + } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; @@ -2833,6 +2848,21 @@ Utf16ToUtfProc( /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ dst += Tcl_UniCharToUtf(-1, dst); } + if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) { + /* We have a single byte left-over at the end */ + if (dst > dstEnd) { + result = TCL_CONVERT_NOSPACE; + } else { + /* destination is not full, so we really are at the end now */ + if (flags & TCL_ENCODING_STOPONERROR) { + result = TCL_CONVERT_SYNTAX; + } else { + dst += Tcl_UniCharToUtf(0xFFFD, dst); + numChars++; + src++; + } + } + } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; @@ -3017,8 +3047,8 @@ UtfToUcs2Proc( len = TclUtfToUniChar(src, &ch); if ((ch >= 0xD800) && (len < 3)) { if (STOPONERROR) { - result = TCL_CONVERT_UNKNOWN; - break; + result = TCL_CONVERT_UNKNOWN; + break; } src += len; src += TclUtfToUniChar(src, &ch); @@ -3028,8 +3058,8 @@ UtfToUcs2Proc( len = TclUtfToUniChar(src, &ch); if (ch > 0xFFFF) { if (STOPONERROR) { - result = TCL_CONVERT_UNKNOWN; - break; + result = TCL_CONVERT_UNKNOWN; + break; } ch = 0xFFFD; } diff --git a/tests/encoding.test b/tests/encoding.test index 83e75be..81323f4 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -534,7 +534,7 @@ test encoding-16.17 {Utf32ToUtfProc} -body { list [encoding convertfrom -strict -failindex idx utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00] [set idx] } -result {A 4} -test encoding-16.9 { +test encoding-16.18 { Utf16ToUtfProc, Tcl_UniCharToUtf, surrogate pairs in utf-16 } -body { apply [list {} { @@ -553,10 +553,15 @@ test encoding-16.9 { return done } [namespace current]] } -result done - - - - +test encoding-16.19 {UnicodeToUtfProc, bug [d19fe0a5b]} -body { + encoding convertfrom utf-16 "\x41\x41\x41" +} -result \u4141\uFFFD +test encoding-16.20 {UnicodeToUtfProc, bug [d19fe0a5b]} -constraints deprecated -body { + encoding convertfrom utf-16 "\xD8\xD8" +} -result \uD8D8 +test encoding-16.21 {UnicodeToUtfProc, bug [d19fe0a5b]} -body { + encoding convertfrom utf-32 "\x00\x00\x00\x00\x41\x41" +} -result \x00\uFFFD test encoding-17.1 {UtfToUtf16Proc} -body { encoding convertto utf-16 "\U460DC" |