summaryrefslogtreecommitdiffstats
path: root/generic/tclEncoding.c
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2023-02-16 21:03:48 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2023-02-16 21:03:48 (GMT)
commit2c3252bc5c0a80e90ade82389f8b80faa41a6e77 (patch)
tree81a8ebf669ce047c83ba24dbd1a5b1b609f7d23f /generic/tclEncoding.c
parentd790e81db2d62271b01b55e00e219d0882f0250d (diff)
parent0563a789022a80cd7745d596028b570f0fb24cbb (diff)
downloadtcl-2c3252bc5c0a80e90ade82389f8b80faa41a6e77.zip
tcl-2c3252bc5c0a80e90ade82389f8b80faa41a6e77.tar.gz
tcl-2c3252bc5c0a80e90ade82389f8b80faa41a6e77.tar.bz2
Fix [5e6ae6e05e]: Implement -strict correctly for cesu-8
Diffstat (limited to 'generic/tclEncoding.c')
-rw-r--r--generic/tclEncoding.c24
1 files changed, 15 insertions, 9 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index af7f30a..10789b1 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -564,7 +564,7 @@ TclInitEncodingSubsystem(void)
type.nullSize = 1;
type.clientData = INT2PTR(ENCODING_UTF);
Tcl_CreateEncoding(&type);
- type.clientData = INT2PTR(TCL_ENCODING_NOCOMPLAIN);
+ type.clientData = INT2PTR(0);
type.encodingName = "cesu-8";
Tcl_CreateEncoding(&type);
@@ -2388,13 +2388,13 @@ UtfToUtfProc(
*dst++ = *src++;
} else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd)
- && (UCHAR(src[1]) == 0x80) && (flags & ENCODING_UTF) && (!(flags & ENCODING_INPUT)
+ && (UCHAR(src[1]) == 0x80) && !(flags & TCL_ENCODING_MODIFIED) && (!(flags & ENCODING_INPUT)
|| ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)
|| (flags & ENCODING_FAILINDEX))) {
/*
* If in input mode, and -strict or -failindex is specified: This is an error.
*/
- if (flags & ENCODING_INPUT) {
+ if ((STOPONERROR) && (flags & ENCODING_INPUT)) {
result = TCL_CONVERT_SYNTAX;
break;
}
@@ -2430,15 +2430,21 @@ UtfToUtfProc(
dst += Tcl_UniCharToUtf(ch, dst);
} else {
int low;
- const char *saveSrc = src;
size_t len = TclUtfToUCS4(src, &ch);
- if ((len < 2) && (ch != 0) && (flags & ENCODING_INPUT)
- && (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX))) {
- result = TCL_CONVERT_SYNTAX;
- break;
+ if (flags & ENCODING_INPUT) {
+ if ((len < 2) && (ch != 0)
+ && (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX))) {
+ result = TCL_CONVERT_SYNTAX;
+ break;
+ } else if ((ch > 0xFFFF) && !(flags & ENCODING_UTF)
+ && (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX))) {
+ result = TCL_CONVERT_SYNTAX;
+ break;
+ }
}
+ const char *saveSrc = src;
src += len;
- if (!(flags & ENCODING_UTF) && (ch > 0x3FF)) {
+ if (!(flags & ENCODING_UTF) && !(flags & ENCODING_INPUT) && (ch > 0x3FF)) {
if (ch > 0xFFFF) {
/* CESU-8 6-byte sequence for chars > U+FFFF */
ch -= 0x10000;