summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2023-03-12 16:10:52 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2023-03-12 16:10:52 (GMT)
commiteeee744ee2f72edd36c45a3ee07dbbee39f16994 (patch)
tree59850507b809aec84dea02821c1e6a7e3a55cbce
parent63b29d841b1918bd77d066db066e4e55eb2f1e0b (diff)
downloadtcl-eeee744ee2f72edd36c45a3ee07dbbee39f16994.zip
tcl-eeee744ee2f72edd36c45a3ee07dbbee39f16994.tar.gz
tcl-eeee744ee2f72edd36c45a3ee07dbbee39f16994.tar.bz2
Minor bug-fix for utf-32: Only throw exception for codepoints > +U10FFFF if "-strict" is specified. Otherwise replace with 0xFFFD
-rw-r--r--generic/tclEncoding.c12
-rw-r--r--tests/encoding.test6
2 files changed, 11 insertions, 7 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 4f334bb..a471fe9 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2606,19 +2606,17 @@ Utf32ToUtfProc(
if ((unsigned)ch > 0x10FFFF) {
ch = 0xFFFD;
- if (STOPONERROR) {
+ if ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) {
result = TCL_CONVERT_SYNTAX;
break;
}
} else if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)
&& ((ch & ~0x7FF) == 0xD800)) {
- if (STOPONERROR) {
- result = TCL_CONVERT_SYNTAX;
+ result = TCL_CONVERT_SYNTAX;
#if TCL_UTF_MAX < 4
- ch = 0;
+ ch = 0;
#endif
- break;
- }
+ break;
}
/*
@@ -2850,7 +2848,7 @@ Utf16ToUtfProc(
if (((prev & ~0x3FF) == 0xD800) && ((ch & ~0x3FF) != 0xDC00)) {
if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) {
result = TCL_CONVERT_UNKNOWN;
- src -= 2; /* Go back to before the high surrogate */
+ src -= 2; /* Go back to beginning of high surrogate */
dst--; /* Also undo writing a single byte too much */
numChars--;
break;
diff --git a/tests/encoding.test b/tests/encoding.test
index 68b5dcd..c8f34ba 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -569,6 +569,12 @@ test encoding-16.22 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body {
test encoding-16.23 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body {
encoding convertfrom -strict utf-16le \x00\xDC
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x00'}
+test encoding-16.24 {Utf32ToUtfProc} -body {
+ encoding convertfrom utf-32 "\xFF\xFF\xFF\xFF"
+} -result \uFFFD
+test encoding-16.25 {Utf32ToUtfProc} -body {
+ encoding convertfrom utf-32 "\x01\x00\x00\x01"
+} -result \uFFFD
test encoding-17.1 {UtfToUtf16Proc} -body {
encoding convertto utf-16 "\U460DC"