summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/tclEncoding.c13
-rw-r--r--tests/encoding.test10
2 files changed, 17 insertions, 6 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index ef32d29..ecf01da 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2505,6 +2505,13 @@ Utf32ToUtfProc(
} else {
ch = (src[0] & 0xFF) << 24 | (src[1] & 0xFF) << 16 | (src[2] & 0xFF) << 8 | (src[3] & 0xFF);
}
+ if (ch >= 0x10FFFF || (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)
+ && !Tcl_UniCharIsUnicode(ch))) {
+ if (STOPONERROR) {
+ result = TCL_CONVERT_SYNTAX;
+ break;
+ }
+ }
/*
* Special case for 1-byte utf chars for speed. Make sure we work with
@@ -2595,12 +2602,11 @@ UtfToUtf32Proc(
break;
}
len = TclUtfToUCS4(src, &ch);
- if (!Tcl_UniCharIsUnicode(ch)) {
+ if (!Tcl_UniCharIsUnicode(ch) && (((ch & ~0x7FF) == 0xD800) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) {
if (STOPONERROR) {
result = TCL_CONVERT_UNKNOWN;
break;
}
- ch = 0xFFFD;
}
src += len;
if (flags & TCL_ENCODING_LE) {
@@ -2798,12 +2804,11 @@ UtfToUtf16Proc(
break;
}
len = TclUtfToUCS4(src, &ch);
- if (!Tcl_UniCharIsUnicode(ch)) {
+ if (!Tcl_UniCharIsUnicode(ch) && (((ch & ~0x7FF) == 0xD800) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) {
if (STOPONERROR) {
result = TCL_CONVERT_UNKNOWN;
break;
}
- ch = 0xFFFD;
}
src += len;
if (flags & TCL_ENCODING_LE) {
diff --git a/tests/encoding.test b/tests/encoding.test
index 89209d0..24d9c82 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -491,16 +491,22 @@ test encoding-17.2 {UtfToUcs2Proc} -body {
} -result "\uFFFD"
test encoding-17.3 {UtfToUtf16Proc} -body {
encoding convertto -nocomplain utf-16be "\uDCDC"
-} -result "\xFF\xFD"
+} -result "\xDC\xDC"
test encoding-17.4 {UtfToUtf16Proc} -body {
encoding convertto -nocomplain utf-16le "\uD8D8"
-} -result "\xFD\xFF"
+} -result "\xD8\xD8"
test encoding-17.5 {UtfToUtf16Proc} -body {
encoding convertto utf-32le "\U460DC"
} -result "\xDC\x60\x04\x00"
test encoding-17.6 {UtfToUtf16Proc} -body {
encoding convertto utf-32be "\U460DC"
} -result "\x00\x04\x60\xDC"
+test encoding-17.7 {UtfToUtf16Proc} -body {
+ encoding convertto -strict utf-16be "\uDCDC"
+} -returnCodes error -result {unexpected character at index 0: 'U+00DCDC'}
+test encoding-17.8 {UtfToUtf16Proc} -body {
+ encoding convertto -strict utf-16le "\uD8D8"
+} -returnCodes error -result {unexpected character at index 0: 'U+00D8D8'}
test encoding-18.1 {TableToUtfProc} {
} {}