summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2023-03-20 11:29:49 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2023-03-20 11:29:49 (GMT)
commit5d63902332fb63571e32e9124190aa3f9c98526b (patch)
treea625d7daab79f2836863888c867636187da0ba3a
parent75664c655d15e9308cf62fcdaee3bed1c4545c63 (diff)
parent2d9a47cff10b0ed3a76254dbeb03b5ec987170f4 (diff)
downloadtcl-5d63902332fb63571e32e9124190aa3f9c98526b.zip
tcl-5d63902332fb63571e32e9124190aa3f9c98526b.tar.gz
tcl-5d63902332fb63571e32e9124190aa3f9c98526b.tar.bz2
Fix [d7fd37ebd9]: handling leftover prefix in table encoding
-rw-r--r--generic/tclEncoding.c31
-rw-r--r--tests/chanio.test2
-rw-r--r--tests/io.test2
3 files changed, 20 insertions, 15 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 0478519..35b74c7 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -3413,18 +3413,23 @@ TableToUtfProc(
if (prefixBytes[byte]) {
src++;
if (src >= srcEnd) {
- /*
- * TODO - this is broken. For consistency with other
- * decoders, an error should be raised only if strict.
- * However, doing that check cause a whole bunch of test
- * failures. Need to verify if those tests are in fact
- * correct.
- */
- src--;
- result = TCL_CONVERT_MULTIBYTE;
- break;
+ if (!(flags & TCL_ENCODING_END)) {
+ src--;
+ result = TCL_CONVERT_MULTIBYTE;
+ break;
+ } else if (PROFILE_STRICT(flags)) {
+ src--;
+ result = TCL_CONVERT_SYNTAX;
+ break;
+ } else if (PROFILE_REPLACE(flags)) {
+ ch = UNICODE_REPLACE_CHAR;
+ } else {
+ numChars++; /* Silently consume */
+ break;
+ }
+ } else {
+ ch = toUnicode[byte][*((unsigned char *)src)];
}
- ch = toUnicode[byte][*((unsigned char *)src)];
} else {
ch = pageZero[byte];
}
@@ -3447,7 +3452,7 @@ TableToUtfProc(
* Special case for 1-byte utf chars for speed.
*/
- if (ch && ch < 0x80) {
+ if ((unsigned)ch - 1 < 0x7F) {
*dst++ = (char) ch;
} else {
dst += Tcl_UniCharToUtf(ch, dst);
@@ -3648,7 +3653,7 @@ Iso88591ToUtfProc(
* Special case for 1-byte utf chars for speed.
*/
- if (ch && ch < 0x80) {
+ if ((unsigned)ch - 1 < 0x7F) {
*dst++ = (char) ch;
} else {
dst += Tcl_UniCharToUtf(ch, dst);
diff --git a/tests/chanio.test b/tests/chanio.test
index d2008e6..b73e681 100644
--- a/tests/chanio.test
+++ b/tests/chanio.test
@@ -1104,7 +1104,7 @@ test chan-io-7.3 {FilterInputBytes: split up character at EOF} -setup {
lappend x [chan gets $f line] $line
} -cleanup {
chan close $f
-} -result [list 15 "123456789012301" 18 0 1 -1 ""]
+} -result [list 15 "123456789012301" 17 1 1 -1 ""]
test chan-io-7.4 {FilterInputBytes: recover from split up character} -setup {
variable x ""
} -constraints {stdio fileevent} -body {
diff --git a/tests/io.test b/tests/io.test
index c3c0cdd..eb4abbd 100644
--- a/tests/io.test
+++ b/tests/io.test
@@ -1136,7 +1136,7 @@ test io-7.3 {FilterInputBytes: split up character at EOF} {testchannel} {
lappend x [gets $f line] $line
close $f
set x
-} [list 15 "123456789012301" 18 0 1 -1 ""]
+} [list 15 "123456789012301" 17 1 1 -1 ""]
test io-7.4 {FilterInputBytes: recover from split up character} {stdio fileevent} {
set f [open "|[list [interpreter] $path(cat)]" w+]
fconfigure $f -encoding binary -buffering none