From ab39084d894caaf7e9a5e362169fd1ddcdb0460c Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 9 Apr 2020 22:27:37 +0000 Subject: Bulletproof the calls to Tcl_UtfPrev in Tcl_AppendLimitedToObj. --- generic/tclStringObj.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index aeb4285..c3c85dc 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -1119,12 +1119,7 @@ Tcl_AppendLimitedToObj( { String *stringPtr; int toCopy = 0; - - if (Tcl_IsShared(objPtr)) { - Tcl_Panic("%s called with shared object", "Tcl_AppendLimitedToObj"); - } - - SetStringFromAny(NULL, objPtr); + int eLen = 0; if (length < 0) { length = (bytes ? strlen(bytes) : 0); @@ -1132,6 +1127,9 @@ Tcl_AppendLimitedToObj( if (length == 0) { return; } + if (limit <= 0) { + return; + } if (length <= limit) { toCopy = length; @@ -1139,8 +1137,12 @@ Tcl_AppendLimitedToObj( if (ellipsis == NULL) { ellipsis = "..."; } - toCopy = (bytes == NULL) ? limit - : Tcl_UtfPrev(bytes+limit+1-strlen(ellipsis), bytes) - bytes; + eLen = strlen(ellipsis); + while (eLen > limit) { + eLen = Tcl_UtfPrev(ellipsis+eLen, ellipsis) - ellipsis; + } + + toCopy = Tcl_UtfPrev(bytes+limit+1-eLen, bytes) - bytes; } /* @@ -1149,6 +1151,11 @@ Tcl_AppendLimitedToObj( * objPtr's string rep. */ + if (Tcl_IsShared(objPtr)) { + Tcl_Panic("%s called with shared object", "Tcl_AppendLimitedToObj"); + } + + SetStringFromAny(NULL, objPtr); stringPtr = GET_STRING(objPtr); if (stringPtr->hasUnicode != 0) { AppendUtfToUnicodeRep(objPtr, bytes, toCopy); @@ -1162,9 +1169,9 @@ Tcl_AppendLimitedToObj( stringPtr = GET_STRING(objPtr); if (stringPtr->hasUnicode != 0) { - AppendUtfToUnicodeRep(objPtr, ellipsis, -1); + AppendUtfToUnicodeRep(objPtr, ellipsis, eLen); } else { - AppendUtfToUtfRep(objPtr, ellipsis, -1); + AppendUtfToUtfRep(objPtr, ellipsis, eLen); } } -- cgit v0.12 From 7f995b755bfead2dfc4865e0d2fc5fb15ea7a946 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 10 Apr 2020 00:01:52 +0000 Subject: Add (disabled) test to demo the inability of [string wordstart] to handle malformed UTF-8 sequences. --- tests/string.test | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/string.test b/tests/string.test index 9a5e0c0..e1ae63a 100644 --- a/tests/string.test +++ b/tests/string.test @@ -1542,6 +1542,11 @@ test string-22.12 {string wordstart, unicode} { test string-22.13 {string wordstart, unicode} { string wordstart "\uc700\uc700 abc" 8 } 3 +test string-22.14 {string wordstart, invalid UTF-8} knownBug { + # See Bug c61818e4c9 + set demo [bytestring "abc def\xE0\xA9ghi"] + string index $demo [string wordstart $demo 10] +} g test string-23.0 {string is boolean, Bug 1187123} testindexobj { set x 5 -- cgit v0.12 From 1aa65236ca8cf733dd41fb38b4e8ae49601d8c2a Mon Sep 17 00:00:00 2001 From: dgp Date: Sat, 11 Apr 2020 22:26:30 +0000 Subject: Repair invalid utf-8 in subst.test. --- tests/subst.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/subst.test b/tests/subst.test index 933b1c6..a809f28 100644 --- a/tests/subst.test +++ b/tests/subst.test @@ -41,7 +41,7 @@ test subst-3.1 {backslash substitutions} { subst {\x\$x\[foo bar]\\} } "x\$x\[foo bar]\\" test subst-3.2 {backslash substitutions with utf chars} { - # 'j' is just a char that doesn't mean anything, and \344 is 'ä' + # 'j' is just a char that doesn't mean anything, and \344 is 'ä' # that also doesn't mean anything, but is multi-byte in UTF-8. list [subst \j] [subst \\j] [subst \\344] [subst \\\344] } "j j \344 \344" -- cgit v0.12 From c878e211c9ae8106d3afb570e8019243b50b5554 Mon Sep 17 00:00:00 2001 From: dgp Date: Sun, 12 Apr 2020 23:47:02 +0000 Subject: [2006888] Backport conversion of test file to strict ASCII. ISO-8859-1 assumption is nonportable and increasingly invalid. Fossil does not like working with files that contain invalid UTF-8 byte sequences. --- tests/stringObj.test | 75 ++++++++++++++++++++++++++-------------------------- 1 file changed, 37 insertions(+), 38 deletions(-) diff --git a/tests/stringObj.test b/tests/stringObj.test index 3b25592..1ac8b84 100644 --- a/tests/stringObj.test +++ b/tests/stringObj.test @@ -1,16 +1,16 @@ # Commands covered: none # -# This file contains tests for the procedures in tclStringObj.c -# that implement the Tcl type manager for the string type. +# This file contains tests for the procedures in tclStringObj.c that implement +# the Tcl type manager for the string type. # -# Sourcing this file into Tcl runs the tests and generates output for -# errors. No output means no errors were found. +# Sourcing this file into Tcl runs the tests and generates output for errors. +# No output means no errors were found. # # Copyright (c) 1995-1997 Sun Microsystems, Inc. # Copyright (c) 1998-1999 by Scriptics Corporation. # -# See the file "license.terms" for information on usage and redistribution -# of this file, and for a DISCLAIMER OF ALL WARRANTIES. +# See the file "license.terms" for information on usage and redistribution of +# this file, and for a DISCLAIMER OF ALL WARRANTIES. if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest @@ -19,7 +19,7 @@ if {[lsearch [namespace children] ::tcltest] == -1} { testConstraint testobj [llength [info commands testobj]] testConstraint testdstring [llength [info commands testdstring]] - + test stringObj-1.1 {string type registration} testobj { set t [testobj types] set first [string first "string" $t] @@ -38,7 +38,7 @@ test stringObj-3.1 {Tcl_SetStringObj, existing "empty string" object} testobj { set result "" lappend result [testobj freeallvars] lappend result [testobj newobj 1] - lappend result [teststringobj set 1 xyz] ;# makes existing obj a string + lappend result [teststringobj set 1 xyz] ;# makes existing obj a string lappend result [testobj type 1] lappend result [testobj refcount 1] } {{} {} xyz string 2} @@ -46,7 +46,7 @@ test stringObj-3.2 {Tcl_SetStringObj, existing non-"empty string" object} testob set result "" lappend result [testobj freeallvars] lappend result [testintobj set 1 512] - lappend result [teststringobj set 1 foo] ;# makes existing obj a string + lappend result [teststringobj set 1 foo] ;# makes existing obj a string lappend result [testobj type 1] lappend result [testobj refcount 1] } {{} 512 foo string 2} @@ -202,19 +202,19 @@ test stringObj-8.1 {DupStringInternalRep procedure} testobj { [teststringobj ualloc 2] [teststringobj get 2] } {5 10 0 abcde 5 5 0 abcde} test stringObj-8.2 {DupUnicodeInternalRep, mixed width chars} testobj { - set x abcï¿®ghi + set x abc\u00ef\u00bf\u00aeghi string length $x set y $x - list [testobj objtype $x] [testobj objtype $y] [append x "®¿ï"] \ + list [testobj objtype $x] [testobj objtype $y] [append x "\u00ae\u00bf\u00ef"] \ [set y] [testobj objtype $x] [testobj objtype $y] -} {string string abcï¿®ghi®¿ï abcï¿®ghi string string} +} "string string abc\u00ef\u00bf\u00aeghi\u00ae\u00bf\u00ef abc\u00ef\u00bf\u00aeghi string string" test stringObj-8.3 {DupUnicodeInternalRep, mixed width chars} testobj { - set x abcï¿®ghi + set x abc\u00ef\u00bf\u00aeghi set y $x string length $x - list [testobj objtype $x] [testobj objtype $y] [append x "®¿ï"] \ + list [testobj objtype $x] [testobj objtype $y] [append x "\u00ae\u00bf\u00ef"] \ [set y] [testobj objtype $x] [testobj objtype $y] -} {string string abcï¿®ghi®¿ï abcï¿®ghi string string} +} "string string abc\u00ef\u00bf\u00aeghi\u00ae\u00bf\u00ef abc\u00ef\u00bf\u00aeghi string string" test stringObj-8.4 {DupUnicodeInternalRep, all byte-size chars} testobj { set x abcdefghi string length $x @@ -240,13 +240,13 @@ test stringObj-9.1 {TclAppendObjToObj, mixed src & dest} {testobj testdstring} { [set y] [testobj objtype $x] [testobj objtype $y] } "string none abc\u00ef\u00bf\u00aeghi\u00ae\u00bf\u00ef \u00ae\u00bf\u00ef string none" test stringObj-9.2 {TclAppendObjToObj, mixed src & dest} testobj { - set x abcï¿®ghi + set x abc\u00ef\u00bf\u00aeghi string length $x list [testobj objtype $x] [append x $x] [testobj objtype $x] \ [append x $x] [testobj objtype $x] -} {string abcï¿®ghiabcï¿®ghi string\ -abcï¿®ghiabcï¿®ghiabcï¿®ghiabcï¿®ghi\ -string} +} "string abc\u00ef\u00bf\u00aeghiabc\u00ef\u00bf\u00aeghi string\ +abc\u00ef\u00bf\u00aeghiabc\u00ef\u00bf\u00aeghiabc\u00ef\u00bf\u00aeghiabc\u00ef\u00bf\u00aeghi\ +string" test stringObj-9.3 {TclAppendObjToObj, mixed src & 1-byte dest} {testobj testdstring} { set x abcdefghi testdstring free @@ -301,20 +301,19 @@ test stringObj-9.9 {TclAppendObjToObj, integer src & 1-byte dest} testobj { [set y] [testobj objtype $x] [testobj objtype $y] } {string int abcdefghi9 9 string int} test stringObj-9.10 {TclAppendObjToObj, integer src & mixed dest} testobj { - set x abcï¿®ghi + set x abc\u00ef\u00bf\u00aeghi set y [expr {4 + 5}] string length $x list [testobj objtype $x] [testobj objtype $y] [append x $y] \ [set y] [testobj objtype $x] [testobj objtype $y] -} {string int abcï¿®ghi9 9 string int} +} "string int abc\u00ef\u00bf\u00aeghi9 9 string int" test stringObj-9.11 {TclAppendObjToObj, mixed src & 1-byte dest index check} testobj { # bug 2678, in <=8.2.0, the second obj (the one to append) in - # Tcl_AppendObjToObj was not correctly checked to see if it was - # all one byte chars, so a unicode string would be added as one - # byte chars. + # Tcl_AppendObjToObj was not correctly checked to see if it was all one + # byte chars, so a unicode string would be added as one byte chars. set x abcdef set len [string length $x] - set y aübåcï + set y a\u00fcb\u00e5c\u00ef set len [string length $y] append x $y string length $x @@ -323,7 +322,7 @@ test stringObj-9.11 {TclAppendObjToObj, mixed src & 1-byte dest index check} tes lappend q [string index $x $i] } set q -} {a b c d e f a ü b å c ï} +} "a b c d e f a \u00fc b \u00e5 c \u00ef" test stringObj-10.1 {Tcl_GetRange with all byte-size chars} {testobj testdstring} { testdstring free @@ -336,7 +335,7 @@ test stringObj-10.2 {Tcl_GetRange with some mixed width chars} {testobj testdstr # Because this test does not use \uXXXX notation below instead of # hardcoding the values, it may fail in multibyte locales. However, we # need to test that the parser produces untyped objects even when there - # are high-ASCII characters in the input (like "ï"). I don't know what + # are high-ASCII characters in the input (like "ï"). I don't know what # else to do but inline those characters here. testdstring free testdstring append "abc\u00ef\u00efdef" -1 @@ -345,7 +344,7 @@ test stringObj-10.2 {Tcl_GetRange with some mixed width chars} {testobj testdstr [testobj objtype $x] [testobj objtype $y] } [list none "bc\u00EF\u00EFde" string string] test stringObj-10.3 {Tcl_GetRange with some mixed width chars} testobj { - # set x "abcïïdef" + # set x "abcïïdef" # Use \uXXXX notation below instead of hardcoding the values, otherwise # the test will fail in multibyte locales. set x "abc\u00EF\u00EFdef" @@ -354,7 +353,7 @@ test stringObj-10.3 {Tcl_GetRange with some mixed width chars} testobj { [testobj objtype $x] [testobj objtype $y] } [list string "bc\u00EF\u00EFde" string string] test stringObj-10.4 {Tcl_GetRange with some mixed width chars} testobj { - # set a "ïa¿b®cï¿d®" + # set a "ïa¿b®cï¿d®" # Use \uXXXX notation below instead of hardcoding the values, otherwise # the test will fail in multibyte locales. set a "\u00EFa\u00BFb\u00AEc\u00EF\u00BFd\u00AE" @@ -389,15 +388,15 @@ test stringObj-12.3 {Tcl_GetUniChar with byte-size chars} testobj { list [string index $x end] [string index $x end-1] } {i h} test stringObj-12.4 {Tcl_GetUniChar with mixed width chars} testobj { - string index "ïa¿b®c®¿dï" 0 -} "ï" + string index "\u00efa\u00bfb\u00aec\u00ae\u00bfd\u00ef" 0 +} "\u00ef" test stringObj-12.5 {Tcl_GetUniChar} testobj { - set x "ïa¿b®c®¿dï" + set x "\u00efa\u00bfb\u00aec\u00ae\u00bfd\u00ef" list [string index $x 4] [string index $x 0] -} {® ï} +} "\u00ae \u00ef" test stringObj-12.6 {Tcl_GetUniChar} testobj { - string index "ïa¿b®cï¿d®" end -} "®" + string index "\u00efa\u00bfb\u00aec\u00ef\u00bfd\u00ae" end +} "\u00ae" test stringObj-13.1 {Tcl_GetCharLength with byte-size chars} testobj { set a "" @@ -411,16 +410,16 @@ test stringObj-13.3 {Tcl_GetCharLength with byte-size chars} testobj { list [string length $a] [string length $a] } {6 6} test stringObj-13.4 {Tcl_GetCharLength with mixed width chars} testobj { - string length "®" + string length "\u00ae" } 1 test stringObj-13.5 {Tcl_GetCharLength with mixed width chars} testobj { - # string length "○○" + # string length "○○" # Use \uXXXX notation below instead of hardcoding the values, otherwise # the test will fail in multibyte locales. string length "\u00EF\u00BF\u00AE\u00EF\u00BF\u00AE" } 6 test stringObj-13.6 {Tcl_GetCharLength with mixed width chars} testobj { - # set a "ïa¿b®cï¿d®" + # set a "ïa¿b®cï¿d®" # Use \uXXXX notation below instead of hardcoding the values, otherwise # the test will fail in multibyte locales. set a "\u00EFa\u00BFb\u00AEc\u00EF\u00BFd\u00AE" -- cgit v0.12 From 7d90cc50f22d633813d2ed620542d15a55755352 Mon Sep 17 00:00:00 2001 From: dgp Date: Sun, 12 Apr 2020 23:53:27 +0000 Subject: Stop direct use of the identity encoding in tests. --- tests/encoding.test | 4 ++-- tests/parse.test | 2 +- tests/stringObj.test | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/encoding.test b/tests/encoding.test index 498e176..8722a93 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -304,13 +304,13 @@ test encoding-15.1 {UtfToUtfProc} { test encoding-15.2 {UtfToUtfProc null character output} { set x \u0000 set y [encoding convertto utf-8 \u0000] - set y [encoding convertfrom identity $y] + set y [bytestring $y] binary scan $y H* z list [string bytelength $x] [string bytelength $y] $z } {2 1 00} test encoding-15.3 {UtfToUtfProc null character input} { - set x [encoding convertfrom identity \x00] + set x [bytestring \x00] set y [encoding convertfrom utf-8 $x] binary scan [encoding convertto identity $y] H* z list [string bytelength $x] [string bytelength $y] $z diff --git a/tests/parse.test b/tests/parse.test index cd02386..13e9839 100644 --- a/tests/parse.test +++ b/tests/parse.test @@ -916,7 +916,7 @@ test parse-15.58 {CommandComplete procedure, memory leaks} { } 1 test parse-15.59 {CommandComplete procedure} { # Test for Tcl Bug 684744 - info complete [encoding convertfrom identity "\x00;if 1 \{"] + info complete [bytestring "\x00;if 1 \{"] } 0 test parse-15.60 {CommandComplete procedure} { # Test for Tcl Bug 1968882 diff --git a/tests/stringObj.test b/tests/stringObj.test index 1ac8b84..b62b768 100644 --- a/tests/stringObj.test +++ b/tests/stringObj.test @@ -427,10 +427,10 @@ test stringObj-13.6 {Tcl_GetCharLength with mixed width chars} testobj { } {10 10} test stringObj-13.7 {Tcl_GetCharLength with identity nulls} testobj { # SF bug #684699 - string length [encoding convertfrom identity \x00] + string length [bytestring \x00] } 1 test stringObj-13.8 {Tcl_GetCharLength with identity nulls} testobj { - string length [encoding convertfrom identity \x01\x00\x02] + string length [bytestring \x01\x00\x02] } 3 test stringObj-14.1 {Tcl_SetObjLength on pure unicode object} testobj { -- cgit v0.12