diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2023-03-20 11:29:49 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2023-03-20 11:29:49 (GMT) |
commit | 5d63902332fb63571e32e9124190aa3f9c98526b (patch) | |
tree | a625d7daab79f2836863888c867636187da0ba3a /generic | |
parent | 75664c655d15e9308cf62fcdaee3bed1c4545c63 (diff) | |
parent | 2d9a47cff10b0ed3a76254dbeb03b5ec987170f4 (diff) | |
download | tcl-5d63902332fb63571e32e9124190aa3f9c98526b.zip tcl-5d63902332fb63571e32e9124190aa3f9c98526b.tar.gz tcl-5d63902332fb63571e32e9124190aa3f9c98526b.tar.bz2 |
Fix [d7fd37ebd9]: handling leftover prefix in table encoding
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tclEncoding.c | 31 |
1 files changed, 18 insertions, 13 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 0478519..35b74c7 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -3413,18 +3413,23 @@ TableToUtfProc( if (prefixBytes[byte]) { src++; if (src >= srcEnd) { - /* - * TODO - this is broken. For consistency with other - * decoders, an error should be raised only if strict. - * However, doing that check cause a whole bunch of test - * failures. Need to verify if those tests are in fact - * correct. - */ - src--; - result = TCL_CONVERT_MULTIBYTE; - break; + if (!(flags & TCL_ENCODING_END)) { + src--; + result = TCL_CONVERT_MULTIBYTE; + break; + } else if (PROFILE_STRICT(flags)) { + src--; + result = TCL_CONVERT_SYNTAX; + break; + } else if (PROFILE_REPLACE(flags)) { + ch = UNICODE_REPLACE_CHAR; + } else { + numChars++; /* Silently consume */ + break; + } + } else { + ch = toUnicode[byte][*((unsigned char *)src)]; } - ch = toUnicode[byte][*((unsigned char *)src)]; } else { ch = pageZero[byte]; } @@ -3447,7 +3452,7 @@ TableToUtfProc( * Special case for 1-byte utf chars for speed. */ - if (ch && ch < 0x80) { + if ((unsigned)ch - 1 < 0x7F) { *dst++ = (char) ch; } else { dst += Tcl_UniCharToUtf(ch, dst); @@ -3648,7 +3653,7 @@ Iso88591ToUtfProc( * Special case for 1-byte utf chars for speed. */ - if (ch && ch < 0x80) { + if ((unsigned)ch - 1 < 0x7F) { *dst++ = (char) ch; } else { dst += Tcl_UniCharToUtf(ch, dst); |