diff options
author | apnadkarni <apnmbx-wits@yahoo.com> | 2023-02-17 11:08:17 (GMT) |
---|---|---|
committer | apnadkarni <apnmbx-wits@yahoo.com> | 2023-02-17 11:08:17 (GMT) |
commit | 05d3910c96aebadc2e6618091738956ac6a1469e (patch) | |
tree | ce26d61aa708ab5bd9eb5d82d55397df74ac1b68 /generic/tclEncoding.c | |
parent | bd084c2fc97ffe2e19f0f44e23f441b89c139e9b (diff) | |
parent | 2c3252bc5c0a80e90ade82389f8b80faa41a6e77 (diff) | |
download | tcl-05d3910c96aebadc2e6618091738956ac6a1469e.zip tcl-05d3910c96aebadc2e6618091738956ac6a1469e.tar.gz tcl-05d3910c96aebadc2e6618091738956ac6a1469e.tar.bz2 |
Merge 8.7
Diffstat (limited to 'generic/tclEncoding.c')
-rw-r--r-- | generic/tclEncoding.c | 37 |
1 files changed, 20 insertions, 17 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 470f8f3..a11e696 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2412,11 +2412,11 @@ UtfToUtfProc( *dst++ = *src++; } else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) && - (UCHAR(src[1]) == 0x80) && (flags & ENCODING_UTF) && + (UCHAR(src[1]) == 0x80) && !(flags & TCL_ENCODING_MODIFIED) && (!(flags & ENCODING_INPUT) || PROFILE_STRICT(profile) || PROFILE_REPLACE(profile))) { /* Special sequence \xC0\x80 */ - if (flags & ENCODING_INPUT) { + if ((PROFILE_STRICT(profile) || PROFILE_REPLACE(profile)) && (flags & ENCODING_INPUT)) { if (PROFILE_REPLACE(profile)) { dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); src += 2; @@ -2485,26 +2485,28 @@ UtfToUtfProc( } else { int low; - const char *saveSrc = src; + int isInvalid = 0; size_t len = TclUtfToUCS4(src, &ch); - - /* - * Valid single char encodings were already handled earlier. - * So len==1 means an invalid byte that is magically transformed - * to a code point unless it resulted from the special - * \xC0\x80 sequence. Tests io-75.* - */ - if ((len < 2) && (ch != 0) && (flags & ENCODING_INPUT)) { - if (PROFILE_STRICT(profile)) { - result = TCL_CONVERT_SYNTAX; - break; - } else if (PROFILE_REPLACE(profile)) { - ch = UNICODE_REPLACE_CHAR; + if (flags & ENCODING_INPUT) { + if ((len < 2) && (ch != 0)) { + isInvalid = 1; + } else if ((ch > 0xFFFF) && !(flags & ENCODING_UTF)) { + isInvalid = 1; + } + if (isInvalid) { + if (PROFILE_STRICT(profile)) { + result = TCL_CONVERT_SYNTAX; + break; + } + else if (PROFILE_REPLACE(profile)) { + ch = UNICODE_REPLACE_CHAR; + } } } + const char *saveSrc = src; src += len; - if (!(flags & ENCODING_UTF) && (ch > 0x3FF)) { + if (!(flags & ENCODING_UTF) && !(flags & ENCODING_INPUT) && (ch > 0x3FF)) { if (ch > 0xFFFF) { /* CESU-8 6-byte sequence for chars > U+FFFF */ ch -= 0x10000; @@ -2670,6 +2672,7 @@ Utf32ToUtfProc( if ((unsigned)ch > 0x10FFFF || SURROGATE(ch)) { if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_SYNTAX; + ch = 0; break; } if (PROFILE_REPLACE(flags)) { |