From 901525301c284507e65f39f5c68785ab9ec1eb16 Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 13 Apr 2020 00:09:43 +0000 Subject: added test case covering [c61818e4c9] - string trim for not valid utf-8 sequence (mistakenly considers NTS-zero char as a continuation of utf-8 pair) --- tests/string.test | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/string.test b/tests/string.test index e1ae63a..f6eaaf0 100644 --- a/tests/string.test +++ b/tests/string.test @@ -1459,6 +1459,23 @@ test string-20.4 {string trimright} { test string-20.5 {string trimright} { string trimright "" } {} +test string-20.6 {string trim on not valid utf-8 sequence (consider NTS as continuation char), bug [c61818e4c9]} -setup { + interp alias {} bytes {} encoding convertfrom identity +} -body { + set result {} + set a [bytes \xc0\x80\x88] + set b foo$a + set m [list \u0000 U \x88 V [bytes \x88] W] + lappend result [string map $m $b] + lappend result [string map $m [string trimright $b x]] + lappend result [string map $m [string trimright $b \u0000]] + lappend result [string map $m [string trimleft $b fox]] + lappend result [string map $m [string trimleft $b fo\u0000]] + lappend result [string map $m [string trim $b fox]] + lappend result [string map $m [string trim $b fo\u0000]] +} -result [list {*}[lrepeat 3 fooUV] {*}[lrepeat 2 UV V]] -cleanup { + interp alias {} bytes {} +} test string-21.1 {string wordend} { list [catch {string wordend a} msg] $msg -- cgit v0.12 From e4426ffac822281e598f797f1a787a4bd05c090b Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 13 Apr 2020 00:12:26 +0000 Subject: Convert test to not directly use identity encoding. --- tests/string.test | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tests/string.test b/tests/string.test index f6eaaf0..72b2a49 100644 --- a/tests/string.test +++ b/tests/string.test @@ -1459,13 +1459,11 @@ test string-20.4 {string trimright} { test string-20.5 {string trimright} { string trimright "" } {} -test string-20.6 {string trim on not valid utf-8 sequence (consider NTS as continuation char), bug [c61818e4c9]} -setup { - interp alias {} bytes {} encoding convertfrom identity -} -body { +test string-20.6 {string trim on not valid utf-8 sequence (consider NTS as continuation char), bug [c61818e4c9]} { set result {} - set a [bytes \xc0\x80\x88] + set a [bytestring \xc0\x80\x88] set b foo$a - set m [list \u0000 U \x88 V [bytes \x88] W] + set m [list \u0000 U \x88 V [bytestring \x88] W] lappend result [string map $m $b] lappend result [string map $m [string trimright $b x]] lappend result [string map $m [string trimright $b \u0000]] @@ -1473,9 +1471,7 @@ test string-20.6 {string trim on not valid utf-8 sequence (consider NTS as conti lappend result [string map $m [string trimleft $b fo\u0000]] lappend result [string map $m [string trim $b fox]] lappend result [string map $m [string trim $b fo\u0000]] -} -result [list {*}[lrepeat 3 fooUV] {*}[lrepeat 2 UV V]] -cleanup { - interp alias {} bytes {} -} +} [list {*}[lrepeat 3 fooUV] {*}[lrepeat 2 UV V]] test string-21.1 {string wordend} { list [catch {string wordend a} msg] $msg -- cgit v0.12 From a8641c2da18720d07e3a0235703a9587b46d12ca Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 13 Apr 2020 01:40:18 +0000 Subject: Another test for [string trimright] that demonstrates its own failures, not those of Tcl_UtfPrev. --- tests/string.test | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/string.test b/tests/string.test index 72b2a49..05a0623 100644 --- a/tests/string.test +++ b/tests/string.test @@ -1472,6 +1472,21 @@ test string-20.6 {string trim on not valid utf-8 sequence (consider NTS as conti lappend result [string map $m [string trim $b fox]] lappend result [string map $m [string trim $b fo\u0000]] } [list {*}[lrepeat 3 fooUV] {*}[lrepeat 2 UV V]] +test string-20.7 {[c61818e4c9] [string trimright] fails when UtfPrev is ok} { + set result {} + set a [bytestring \xE8\x80] + set b foo$a + set m [list \xE8 U \x80 V [bytestring \xE8] W [bytestring \x80] X]] + lappend result [string map $m $b] + lappend result [string map $m [string trimright $b x]] + lappend result [string map $m [string trimright $b \xE8]] + lappend result [string map $m [string trimright $b [bytestring \xE8]]] + lappend result [string map $m [string trimright $b \x80]] + lappend result [string map $m [string trimright $b [bytestring \x80]]] + lappend result [string map $m [string trimright $b \xE8\x80]] + lappend result [string map $m [string trimright $b [bytestring \xE8\x80]]] + lappend result [string map $m [string trimright $b \u0000]] +} [list {*}[lrepeat 4 fooUV] {*}[lrepeat 2 fooU] {*}[lrepeat 2 foo] fooUV] test string-21.1 {string wordend} { list [catch {string wordend a} msg] $msg -- cgit v0.12