diff options
| author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2023-03-12 16:10:52 (GMT) |
|---|---|---|
| committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2023-03-12 16:10:52 (GMT) |
| commit | eeee744ee2f72edd36c45a3ee07dbbee39f16994 (patch) | |
| tree | 59850507b809aec84dea02821c1e6a7e3a55cbce | |
| parent | 63b29d841b1918bd77d066db066e4e55eb2f1e0b (diff) | |
| download | tcl-eeee744ee2f72edd36c45a3ee07dbbee39f16994.zip tcl-eeee744ee2f72edd36c45a3ee07dbbee39f16994.tar.gz tcl-eeee744ee2f72edd36c45a3ee07dbbee39f16994.tar.bz2 | |
Minor bug-fix for utf-32: Only throw exception for codepoints > +U10FFFF if "-strict" is specified. Otherwise replace with 0xFFFD
| -rw-r--r-- | generic/tclEncoding.c | 12 | ||||
| -rw-r--r-- | tests/encoding.test | 6 |
2 files changed, 11 insertions, 7 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 4f334bb..a471fe9 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2606,19 +2606,17 @@ Utf32ToUtfProc( if ((unsigned)ch > 0x10FFFF) { ch = 0xFFFD; - if (STOPONERROR) { + if ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) { result = TCL_CONVERT_SYNTAX; break; } } else if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) && ((ch & ~0x7FF) == 0xD800)) { - if (STOPONERROR) { - result = TCL_CONVERT_SYNTAX; + result = TCL_CONVERT_SYNTAX; #if TCL_UTF_MAX < 4 - ch = 0; + ch = 0; #endif - break; - } + break; } /* @@ -2850,7 +2848,7 @@ Utf16ToUtfProc( if (((prev & ~0x3FF) == 0xD800) && ((ch & ~0x3FF) != 0xDC00)) { if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) { result = TCL_CONVERT_UNKNOWN; - src -= 2; /* Go back to before the high surrogate */ + src -= 2; /* Go back to beginning of high surrogate */ dst--; /* Also undo writing a single byte too much */ numChars--; break; diff --git a/tests/encoding.test b/tests/encoding.test index 68b5dcd..c8f34ba 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -569,6 +569,12 @@ test encoding-16.22 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body { test encoding-16.23 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body { encoding convertfrom -strict utf-16le \x00\xDC } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x00'} +test encoding-16.24 {Utf32ToUtfProc} -body { + encoding convertfrom utf-32 "\xFF\xFF\xFF\xFF" +} -result \uFFFD +test encoding-16.25 {Utf32ToUtfProc} -body { + encoding convertfrom utf-32 "\x01\x00\x00\x01" +} -result \uFFFD test encoding-17.1 {UtfToUtf16Proc} -body { encoding convertto utf-16 "\U460DC" |
