diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2023-02-16 21:03:48 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2023-02-16 21:03:48 (GMT) |
commit | 2c3252bc5c0a80e90ade82389f8b80faa41a6e77 (patch) | |
tree | 81a8ebf669ce047c83ba24dbd1a5b1b609f7d23f /generic/tclEncoding.c | |
parent | d790e81db2d62271b01b55e00e219d0882f0250d (diff) | |
parent | 0563a789022a80cd7745d596028b570f0fb24cbb (diff) | |
download | tcl-2c3252bc5c0a80e90ade82389f8b80faa41a6e77.zip tcl-2c3252bc5c0a80e90ade82389f8b80faa41a6e77.tar.gz tcl-2c3252bc5c0a80e90ade82389f8b80faa41a6e77.tar.bz2 |
Fix [5e6ae6e05e]: Implement -strict correctly for cesu-8
Diffstat (limited to 'generic/tclEncoding.c')
-rw-r--r-- | generic/tclEncoding.c | 24 |
1 files changed, 15 insertions, 9 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index af7f30a..10789b1 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -564,7 +564,7 @@ TclInitEncodingSubsystem(void) type.nullSize = 1; type.clientData = INT2PTR(ENCODING_UTF); Tcl_CreateEncoding(&type); - type.clientData = INT2PTR(TCL_ENCODING_NOCOMPLAIN); + type.clientData = INT2PTR(0); type.encodingName = "cesu-8"; Tcl_CreateEncoding(&type); @@ -2388,13 +2388,13 @@ UtfToUtfProc( *dst++ = *src++; } else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) - && (UCHAR(src[1]) == 0x80) && (flags & ENCODING_UTF) && (!(flags & ENCODING_INPUT) + && (UCHAR(src[1]) == 0x80) && !(flags & TCL_ENCODING_MODIFIED) && (!(flags & ENCODING_INPUT) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX))) { /* * If in input mode, and -strict or -failindex is specified: This is an error. */ - if (flags & ENCODING_INPUT) { + if ((STOPONERROR) && (flags & ENCODING_INPUT)) { result = TCL_CONVERT_SYNTAX; break; } @@ -2430,15 +2430,21 @@ UtfToUtfProc( dst += Tcl_UniCharToUtf(ch, dst); } else { int low; - const char *saveSrc = src; size_t len = TclUtfToUCS4(src, &ch); - if ((len < 2) && (ch != 0) && (flags & ENCODING_INPUT) - && (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX))) { - result = TCL_CONVERT_SYNTAX; - break; + if (flags & ENCODING_INPUT) { + if ((len < 2) && (ch != 0) + && (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX))) { + result = TCL_CONVERT_SYNTAX; + break; + } else if ((ch > 0xFFFF) && !(flags & ENCODING_UTF) + && (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX))) { + result = TCL_CONVERT_SYNTAX; + break; + } } + const char *saveSrc = src; src += len; - if (!(flags & ENCODING_UTF) && (ch > 0x3FF)) { + if (!(flags & ENCODING_UTF) && !(flags & ENCODING_INPUT) && (ch > 0x3FF)) { if (ch > 0xFFFF) { /* CESU-8 6-byte sequence for chars > U+FFFF */ ch -= 0x10000; |