From 5dab0fa5de0367d41b4ad28b9a66c3cd1bf818ba Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sun, 2 Jul 2023 04:55:47 +0000 Subject: Backport fix [66ffafd309]. DBCS infinite loop on invalid encoding --- generic/tclEncoding.c | 9 +++++---- tests/chanio.test | 2 +- tests/encoding.test | 21 +++++++++++++++++++++ tests/io.test | 2 +- 4 files changed, 28 insertions(+), 6 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index f9f93fe..d0f04a7 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -3400,19 +3400,19 @@ TableToUtfProc( src++; if (src >= srcEnd) { if (!(flags & TCL_ENCODING_END)) { + /* Suffix bytes expected, don't consume prefix */ src--; result = TCL_CONVERT_MULTIBYTE; break; } else if (PROFILE_STRICT(flags)) { + /* Truncation. Do not consume so error location correct */ src--; result = TCL_CONVERT_SYNTAX; break; } else if (PROFILE_REPLACE(flags)) { ch = UNICODE_REPLACE_CHAR; } else { - src--; /* See bug [bdcb5126c0] */ - result = TCL_CONVERT_MULTIBYTE; - break; + ch = (unsigned) byte; } } else { ch = toUnicode[byte][*((unsigned char *)src)]; @@ -3421,6 +3421,7 @@ TableToUtfProc( ch = pageZero[byte]; } if ((ch == 0) && (byte != 0)) { + /* Prefix+suffix pair is invalid */ if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_SYNTAX; break; @@ -3431,7 +3432,7 @@ TableToUtfProc( if (PROFILE_REPLACE(flags)) { ch = UNICODE_REPLACE_CHAR; } else { - ch = (Tcl_UniChar)byte; + ch = (Tcl_UniChar)byte; } } diff --git a/tests/chanio.test b/tests/chanio.test index b80c926..c5d3aca 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -1104,7 +1104,7 @@ test chan-io-7.3 {FilterInputBytes: split up character at EOF} -setup { lappend x [chan gets $f line] $line } -cleanup { chan close $f -} -result [list 15 "123456789012301" 18 0 1 -1 ""] +} -result [list 16 "123456789012301\x82" 18 0 1 -1 ""] test chan-io-7.4 {FilterInputBytes: recover from split up character} -setup { variable x "" } -constraints {stdio fileevent} -body { diff --git a/tests/encoding.test b/tests/encoding.test index 4abc10e..8167357 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -1119,6 +1119,27 @@ test encoding-bug-6a3e2cb0f0-3 {Bug [6a3e2cb0f0] - invalid bytes in escape encod encoding convertfrom -profile replace iso2022-jp x\x1b\x7aaby } -result x\uFFFDy +test encoding-bug-66ffafd309-1-tcl8 {Bug [66ffafd309] - truncated DBCS} -body { + encoding convertfrom -profile tcl8 gb12345 x +} -result x +test encoding-bug-66ffafd309-1-strict {Bug [66ffafd309] - truncated DBCS} -body { + encoding convertfrom -profile strict gb12345 x +} -result {unexpected byte sequence starting at index 0: '\x78'} -returnCodes error +test encoding-bug-66ffafd309-1-replace {Bug [66ffafd309] - truncated DBCS} -body { + encoding convertfrom -profile replace gb12345 x +} -result \uFFFD +test encoding-bug-66ffafd309-2-tcl8 {Bug [66ffafd309] - invalid DBCS} -body { + # Not truncated but invalid + encoding convertfrom -profile tcl8 jis0208 \x78\x79 +} -result \x78\x79 +test encoding-bug-66ffafd309-2-strict {Bug [66ffafd309] - invalid DBCS} -body { + # Not truncated but invalid + encoding convertfrom -profile strict jis0208 \x78\x79 +} -result {unexpected byte sequence starting at index 1: '\x79'} -returnCodes error +test encoding-bug-66ffafd309-2-replace {Bug [66ffafd309] - invalid DBCS} -body { + # Not truncated but invalid + encoding convertfrom -profile replace jis0208 \x78\x79 +} -result \uFFFD\uFFFD # cleanup namespace delete ::tcl::test::encoding ::tcltest::cleanupTests diff --git a/tests/io.test b/tests/io.test index c0191ee..ca636ce 100644 --- a/tests/io.test +++ b/tests/io.test @@ -1136,7 +1136,7 @@ test io-7.3 {FilterInputBytes: split up character at EOF} {testchannel} { lappend x [gets $f line] $line close $f set x -} [list 15 "123456789012301" 18 0 1 -1 ""] +} [list 16 "123456789012301\x82" 18 0 1 -1 ""] test io-7.4 {FilterInputBytes: recover from split up character} {stdio fileevent} { set f [open "|[list [interpreter] $path(cat)]" w+] fconfigure $f -encoding binary -buffering none -- cgit v0.12