diff options
-rw-r--r-- | generic/tclEncoding.c | 13 | ||||
-rw-r--r-- | tests/encoding.test | 10 |
2 files changed, 17 insertions, 6 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index ef32d29..ecf01da 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2505,6 +2505,13 @@ Utf32ToUtfProc( } else { ch = (src[0] & 0xFF) << 24 | (src[1] & 0xFF) << 16 | (src[2] & 0xFF) << 8 | (src[3] & 0xFF); } + if (ch >= 0x10FFFF || (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) + && !Tcl_UniCharIsUnicode(ch))) { + if (STOPONERROR) { + result = TCL_CONVERT_SYNTAX; + break; + } + } /* * Special case for 1-byte utf chars for speed. Make sure we work with @@ -2595,12 +2602,11 @@ UtfToUtf32Proc( break; } len = TclUtfToUCS4(src, &ch); - if (!Tcl_UniCharIsUnicode(ch)) { + if (!Tcl_UniCharIsUnicode(ch) && (((ch & ~0x7FF) == 0xD800) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) { if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; break; } - ch = 0xFFFD; } src += len; if (flags & TCL_ENCODING_LE) { @@ -2798,12 +2804,11 @@ UtfToUtf16Proc( break; } len = TclUtfToUCS4(src, &ch); - if (!Tcl_UniCharIsUnicode(ch)) { + if (!Tcl_UniCharIsUnicode(ch) && (((ch & ~0x7FF) == 0xD800) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) { if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; break; } - ch = 0xFFFD; } src += len; if (flags & TCL_ENCODING_LE) { diff --git a/tests/encoding.test b/tests/encoding.test index 89209d0..24d9c82 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -491,16 +491,22 @@ test encoding-17.2 {UtfToUcs2Proc} -body { } -result "\uFFFD" test encoding-17.3 {UtfToUtf16Proc} -body { encoding convertto -nocomplain utf-16be "\uDCDC" -} -result "\xFF\xFD" +} -result "\xDC\xDC" test encoding-17.4 {UtfToUtf16Proc} -body { encoding convertto -nocomplain utf-16le "\uD8D8" -} -result "\xFD\xFF" +} -result "\xD8\xD8" test encoding-17.5 {UtfToUtf16Proc} -body { encoding convertto utf-32le "\U460DC" } -result "\xDC\x60\x04\x00" test encoding-17.6 {UtfToUtf16Proc} -body { encoding convertto utf-32be "\U460DC" } -result "\x00\x04\x60\xDC" +test encoding-17.7 {UtfToUtf16Proc} -body { + encoding convertto -strict utf-16be "\uDCDC" +} -returnCodes error -result {unexpected character at index 0: 'U+00DCDC'} +test encoding-17.8 {UtfToUtf16Proc} -body { + encoding convertto -strict utf-16le "\uD8D8" +} -returnCodes error -result {unexpected character at index 0: 'U+00D8D8'} test encoding-18.1 {TableToUtfProc} { } {} |