From 45e646472daf2fe6beb41555fe088b929cd2ce6c Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 11 Jan 2018 15:44:05 +0000 Subject: Fix behavior of Tcl_GetRange() and "string range" regarding surrogates, when Tcl is compiled with -DTCL_UTF_MAX=4. Partial fix for bug [11ae2be95dac9417]. Also, fix typo. --- generic/tclStringObj.c | 13 ++++++++++++- tests/string.test | 4 ++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 0f238cf..1b35c56 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -533,7 +533,7 @@ Tcl_GetUniChar( * * Get the Unicode form of the String object. If the object is not * already a String object, it will be converted to one. If the String - * object does not have a Unicode rep, then one is create from the UTF + * object does not have a Unicode rep, then one is created from the UTF * string format. * * Results: @@ -667,6 +667,17 @@ Tcl_GetRange( stringPtr = GET_STRING(objPtr); } +#if TCL_UTF_MAX == 4 + /* See: bug [11ae2be95dac9417] */ + if ((first>0) && ((stringPtr->unicode[first]&0xFC00) == 0xDC00) + && ((stringPtr->unicode[first-1]&0xFC00) == 0xD800)) { + ++first; + } + if ((last+1numChars) && ((stringPtr->unicode[last+1]&0xFC00) == 0xDC00) + && ((stringPtr->unicode[last]&0xFC00) == 0xD800)) { + ++last; + } +#endif return Tcl_NewUnicodeObj(stringPtr->unicode + first, last-first+1); } diff --git a/tests/string.test b/tests/string.test index bbba5eb..53f1cfb 100644 --- a/tests/string.test +++ b/tests/string.test @@ -24,6 +24,7 @@ catch [list package require -exact Tcltest [info patchlevel]] testConstraint testobj [expr {[info commands testobj] != {}}] testConstraint testindexobj [expr {[info commands testindexobj] != {}}] +testConstraint fullutf [expr {[format %c 0x010000] != "\ufffd"}] # Used for constraining memory leak tests testConstraint memory [llength [info commands memory]] @@ -1276,6 +1277,9 @@ test string-12.22 {string range, shimmering binary/index} { binary scan $s a* x string range $s $s end } 000000001 +test string-12.23 {string range, surrogates, bug [11ae2be95dac9417]} fullutf { + list [string range a\U100000b 1 1] [string range a\U100000b 2 2] [string range a\U100000b 3 3] +} [list \U100000 {} b] test string-13.1 {string repeat} { list [catch {string repeat} msg] $msg -- cgit v0.12