summaryrefslogtreecommitdiffstats
path: root/generic/tclEncoding.c
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2022-12-16 09:40:03 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2022-12-16 09:40:03 (GMT)
commitedee881fb26816194a320932f55e095b077d0e94 (patch)
tree63424ac24ff914a15886561d6fb5b453ea79f034 /generic/tclEncoding.c
parentc78879e7075b00638b1dc25237b2e208e35a82ca (diff)
parentd762cb93317bae1c1dbf046748237ef58c132f08 (diff)
downloadtcl-edee881fb26816194a320932f55e095b077d0e94.zip
tcl-edee881fb26816194a320932f55e095b077d0e94.tar.gz
tcl-edee881fb26816194a320932f55e095b077d0e94.tar.bz2
Follow-up for [084ab982fe]: Use -strict to disable noncharacters. This change was only done for utf-8, but the same change should be done for utf-16 and utf-32 too
Diffstat (limited to 'generic/tclEncoding.c')
-rw-r--r--generic/tclEncoding.c13
1 files changed, 9 insertions, 4 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 169e975..78b0b9d 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2565,6 +2565,13 @@ Utf32ToUtfProc(
} else {
ch = (src[0] & 0xFF) << 24 | (src[1] & 0xFF) << 16 | (src[2] & 0xFF) << 8 | (src[3] & 0xFF);
}
+ if (ch >= 0x10FFFF || (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)
+ && !Tcl_UniCharIsUnicode(ch))) {
+ if (STOPONERROR) {
+ result = TCL_CONVERT_SYNTAX;
+ break;
+ }
+ }
/*
* Special case for 1-byte utf chars for speed. Make sure we work with
@@ -2655,12 +2662,11 @@ UtfToUtf32Proc(
break;
}
len = TclUtfToUCS4(src, &ch);
- if (!Tcl_UniCharIsUnicode(ch)) {
+ if (!Tcl_UniCharIsUnicode(ch) && (((ch & ~0x7FF) == 0xD800) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) {
if (STOPONERROR) {
result = TCL_CONVERT_UNKNOWN;
break;
}
- ch = 0xFFFD;
}
src += len;
if (flags & TCL_ENCODING_LE) {
@@ -2858,12 +2864,11 @@ UtfToUtf16Proc(
break;
}
len = TclUtfToUCS4(src, &ch);
- if (!Tcl_UniCharIsUnicode(ch)) {
+ if (!Tcl_UniCharIsUnicode(ch) && (((ch & ~0x7FF) == 0xD800) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) {
if (STOPONERROR) {
result = TCL_CONVERT_UNKNOWN;
break;
}
- ch = 0xFFFD;
}
src += len;
if (flags & TCL_ENCODING_LE) {