diff options
author | dgp <dgp@users.sourceforge.net> | 2020-04-13 15:01:38 (GMT) |
---|---|---|
committer | dgp <dgp@users.sourceforge.net> | 2020-04-13 15:01:38 (GMT) |
commit | 9b529e05399279d3631a9740e3b61bb19a1af5d8 (patch) | |
tree | 0a91f7f4694d6ce28627cc80407449bb9fb0417f | |
parent | 5ca0fff784edcbd2b7eabe15c8aa7ed414ca12f2 (diff) | |
download | tcl-9b529e05399279d3631a9740e3b61bb19a1af5d8.zip tcl-9b529e05399279d3631a9740e3b61bb19a1af5d8.tar.gz tcl-9b529e05399279d3631a9740e3b61bb19a1af5d8.tar.bz2 |
Known bug test string-22.14 is not so hard to fix.
-rw-r--r-- | generic/tclCmdMZ.c | 14 | ||||
-rw-r--r-- | tests/string.test | 2 |
2 files changed, 13 insertions, 3 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index d4fa4e9..6515d98 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -2396,12 +2396,22 @@ StringStartCmd( cur = 0; if (index > 0) { p = Tcl_UtfAtIndex(string, index); + + TclUtfToUniChar(p, &ch); for (cur = index; cur >= 0; cur--) { - TclUtfToUniChar(p, &ch); + int delta = 0; + const char *next; + if (!Tcl_UniCharIsWordChar(ch)) { break; } - p = Tcl_UtfPrev(p, string); + + next = Tcl_UtfPrev(p, string); + do { + next += delta; + delta = TclUtfToUniChar(next, &ch); + } while (next + delta < p); + p = next; } if (cur != index) { cur += 1; diff --git a/tests/string.test b/tests/string.test index 05a0623..54b9c95 100644 --- a/tests/string.test +++ b/tests/string.test @@ -1570,7 +1570,7 @@ test string-22.12 {string wordstart, unicode} { test string-22.13 {string wordstart, unicode} { string wordstart "\uc700\uc700 abc" 8 } 3 -test string-22.14 {string wordstart, invalid UTF-8} knownBug { +test string-22.14 {string wordstart, invalid UTF-8} { # See Bug c61818e4c9 set demo [bytestring "abc def\xE0\xA9ghi"] string index $demo [string wordstart $demo 10] |