diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2023-03-20 11:29:49 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2023-03-20 11:29:49 (GMT) |
commit | 5d63902332fb63571e32e9124190aa3f9c98526b (patch) | |
tree | a625d7daab79f2836863888c867636187da0ba3a | |
parent | 75664c655d15e9308cf62fcdaee3bed1c4545c63 (diff) | |
parent | 2d9a47cff10b0ed3a76254dbeb03b5ec987170f4 (diff) | |
download | tcl-5d63902332fb63571e32e9124190aa3f9c98526b.zip tcl-5d63902332fb63571e32e9124190aa3f9c98526b.tar.gz tcl-5d63902332fb63571e32e9124190aa3f9c98526b.tar.bz2 |
Fix [d7fd37ebd9]: handling leftover prefix in table encoding
-rw-r--r-- | generic/tclEncoding.c | 31 | ||||
-rw-r--r-- | tests/chanio.test | 2 | ||||
-rw-r--r-- | tests/io.test | 2 |
3 files changed, 20 insertions, 15 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 0478519..35b74c7 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -3413,18 +3413,23 @@ TableToUtfProc( if (prefixBytes[byte]) { src++; if (src >= srcEnd) { - /* - * TODO - this is broken. For consistency with other - * decoders, an error should be raised only if strict. - * However, doing that check cause a whole bunch of test - * failures. Need to verify if those tests are in fact - * correct. - */ - src--; - result = TCL_CONVERT_MULTIBYTE; - break; + if (!(flags & TCL_ENCODING_END)) { + src--; + result = TCL_CONVERT_MULTIBYTE; + break; + } else if (PROFILE_STRICT(flags)) { + src--; + result = TCL_CONVERT_SYNTAX; + break; + } else if (PROFILE_REPLACE(flags)) { + ch = UNICODE_REPLACE_CHAR; + } else { + numChars++; /* Silently consume */ + break; + } + } else { + ch = toUnicode[byte][*((unsigned char *)src)]; } - ch = toUnicode[byte][*((unsigned char *)src)]; } else { ch = pageZero[byte]; } @@ -3447,7 +3452,7 @@ TableToUtfProc( * Special case for 1-byte utf chars for speed. */ - if (ch && ch < 0x80) { + if ((unsigned)ch - 1 < 0x7F) { *dst++ = (char) ch; } else { dst += Tcl_UniCharToUtf(ch, dst); @@ -3648,7 +3653,7 @@ Iso88591ToUtfProc( * Special case for 1-byte utf chars for speed. */ - if (ch && ch < 0x80) { + if ((unsigned)ch - 1 < 0x7F) { *dst++ = (char) ch; } else { dst += Tcl_UniCharToUtf(ch, dst); diff --git a/tests/chanio.test b/tests/chanio.test index d2008e6..b73e681 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -1104,7 +1104,7 @@ test chan-io-7.3 {FilterInputBytes: split up character at EOF} -setup { lappend x [chan gets $f line] $line } -cleanup { chan close $f -} -result [list 15 "123456789012301" 18 0 1 -1 ""] +} -result [list 15 "123456789012301" 17 1 1 -1 ""] test chan-io-7.4 {FilterInputBytes: recover from split up character} -setup { variable x "" } -constraints {stdio fileevent} -body { diff --git a/tests/io.test b/tests/io.test index c3c0cdd..eb4abbd 100644 --- a/tests/io.test +++ b/tests/io.test @@ -1136,7 +1136,7 @@ test io-7.3 {FilterInputBytes: split up character at EOF} {testchannel} { lappend x [gets $f line] $line close $f set x -} [list 15 "123456789012301" 18 0 1 -1 ""] +} [list 15 "123456789012301" 17 1 1 -1 ""] test io-7.4 {FilterInputBytes: recover from split up character} {stdio fileevent} { set f [open "|[list [interpreter] $path(cat)]" w+] fconfigure $f -encoding binary -buffering none |