diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2023-03-19 21:51:50 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2023-03-19 21:51:50 (GMT) |
commit | 2d9a47cff10b0ed3a76254dbeb03b5ec987170f4 (patch) | |
tree | 931f7aad332c386abe2317b3b8125909a5e0b2a3 /generic/tclEncoding.c | |
parent | 800d78f04d79def339bde5edb9042b6288524460 (diff) | |
download | tcl-2d9a47cff10b0ed3a76254dbeb03b5ec987170f4.zip tcl-2d9a47cff10b0ed3a76254dbeb03b5ec987170f4.tar.gz tcl-2d9a47cff10b0ed3a76254dbeb03b5ec987170f4.tar.bz2 |
Possible fix for [d7fd37ebd9]: handling leftover prefix in table encoding
Diffstat (limited to 'generic/tclEncoding.c')
-rw-r--r-- | generic/tclEncoding.c | 30 |
1 files changed, 17 insertions, 13 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 0478519..69b7b6c 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -3413,18 +3413,22 @@ TableToUtfProc( if (prefixBytes[byte]) { src++; if (src >= srcEnd) { - /* - * TODO - this is broken. For consistency with other - * decoders, an error should be raised only if strict. - * However, doing that check cause a whole bunch of test - * failures. Need to verify if those tests are in fact - * correct. - */ - src--; - result = TCL_CONVERT_MULTIBYTE; - break; + if (!(flags & TCL_ENCODING_END)) { + src--; + result = TCL_CONVERT_MULTIBYTE; + break; + } else if (PROFILE_STRICT(flags)) { + src--; + result = TCL_CONVERT_SYNTAX; + break; + } else if (PROFILE_REPLACE(flags)) { + ch = UNICODE_REPLACE_CHAR; + } else { + ch = (Tcl_UniChar)byte; + } + } else { + ch = toUnicode[byte][*((unsigned char *)src)]; } - ch = toUnicode[byte][*((unsigned char *)src)]; } else { ch = pageZero[byte]; } @@ -3447,7 +3451,7 @@ TableToUtfProc( * Special case for 1-byte utf chars for speed. */ - if (ch && ch < 0x80) { + if ((unsigned)ch - 1 < 0x7F) { *dst++ = (char) ch; } else { dst += Tcl_UniCharToUtf(ch, dst); @@ -3648,7 +3652,7 @@ Iso88591ToUtfProc( * Special case for 1-byte utf chars for speed. */ - if (ch && ch < 0x80) { + if ((unsigned)ch - 1 < 0x7F) { *dst++ = (char) ch; } else { dst += Tcl_UniCharToUtf(ch, dst); |