From 4519681dcd20c967abbe68795911c77c317fab4f Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 23 Apr 2018 23:23:00 +0000 Subject: Bug-fix in Tcl_UtfAtIndex (for TCL_UTF_MAX=4 only). With test-case (in "string totitle") demonstrating the bug. --- generic/tclUtf.c | 8 ++++++++ tests/string.test | 11 +++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 0d88d36..c08464b 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -762,10 +762,18 @@ Tcl_UtfAtIndex( register int index) /* The position of the desired character. */ { Tcl_UniChar ch = 0; + int len = 1; while (index-- > 0) { + len = TclUtfToUniChar(src, &ch); + src += len; + } +#if TCL_UTF_MAX == 4 + if (!len) { + /* Index points at character following High Surrogate */ src += TclUtfToUniChar(src, &ch); } +#endif return src; } diff --git a/tests/string.test b/tests/string.test index d69fda4..868fc25 100644 --- a/tests/string.test +++ b/tests/string.test @@ -24,7 +24,7 @@ catch [list package require -exact Tcltest [info patchlevel]] testConstraint testobj [expr {[info commands testobj] != {}}] testConstraint testindexobj [expr {[info commands testindexobj] != {}}] -testConstraint fullutf [expr {[format %c 0x010000] != "\ufffd"}] +testConstraint tip389 [expr {[string length \U010000] == 2}] # Used for constraining memory leak tests testConstraint memory [llength [info commands memory]] @@ -291,6 +291,9 @@ test string-5.19 {string index, bytearray object out of bounds} { test string-5.20 {string index, bytearray object out of bounds} { string index [binary format I* {0x50515253 0x52}] 20 } {} +test string-5.21 {string index, surrogates, bug [11ae2be95dac9417]} tip389 { + list [string index a\U100000b 1] [string index a\U100000b 2] [string index a\U100000b 3] +} [list \U100000 {} b] proc largest_int {} { @@ -1280,7 +1283,7 @@ test string-12.22 {string range, shimmering binary/index} { binary scan $s a* x string range $s $s end } 000000001 -test string-12.23 {string range, surrogates, bug [11ae2be95dac9417]} fullutf { +test string-12.23 {string range, surrogates, bug [11ae2be95dac9417]} tip389 { list [string range a\U100000b 1 1] [string range a\U100000b 2 2] [string range a\U100000b 3 3] } [list \U100000 {} b] @@ -1477,6 +1480,10 @@ test string-17.7 {string totitle, unicode} { test string-17.8 {string totitle, compiled} { lindex [string totitle [list aa bb [list cc]]] 0 } Aa +test string-17.9 {string totitle, surrogates, bug [11ae2be95dac9417]} tip389 { + list [string totitle a\U118c0c 1 1] [string totitle a\U118c0c 2 2] \ + [string totitle a\U118c0c 3 3] +} [list a\U118a0c a\U118c0C a\U118c0C] test string-18.1 {string trim} { list [catch {string trim} msg] $msg -- cgit v0.12