From 4bed7923e3d13f9f4e7841c45e3ecfca69d74961 Mon Sep 17 00:00:00 2001 From: vincentdarley Date: Wed, 7 Jan 2004 15:20:53 +0000 Subject: utf-8 aware text wordstart and wordend --- ChangeLog | 7 +++ generic/tkTextIndex.c | 31 +++++++++----- tests/textIndex.test | 115 ++++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 126 insertions(+), 27 deletions(-) diff --git a/ChangeLog b/ChangeLog index b8a5d28..489682b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,12 @@ 2004-01-07 Vince Darley + * generic/tkTextIndex.c: + * tests/textIndex.test: fixed bug in which 'wordstart' and + 'wordend' were not utf-8 aware (they haven't been changed since + Tk 8.0), and added tests. + +2004-01-07 Vince Darley + * win/tkWinMenu.c: only provide a submenu handle when the MF_POPUP flag is given, fixing a recently-introduced crash when submenus are disabled. Also better error checking for diff --git a/generic/tkTextIndex.c b/generic/tkTextIndex.c index 667cb64..60138eb 100644 --- a/generic/tkTextIndex.c +++ b/generic/tkTextIndex.c @@ -10,7 +10,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tkTextIndex.c,v 1.13 2003/12/15 11:51:06 vincentdarley Exp $ + * RCS: @(#) $Id: tkTextIndex.c,v 1.14 2004/01/07 15:21:02 vincentdarley Exp $ */ #include "default.h" @@ -1957,7 +1957,6 @@ StartEnd(textPtr, string, indexPtr) TkTextIndex *indexPtr; /* Index to modify based on string. */ { CONST char *p; - int c, offset; size_t length; register TkTextSegment *segPtr; int modifier; @@ -2024,6 +2023,7 @@ StartEnd(textPtr, string, indexPtr) } else if ((*string == 'w') && (strncmp(string, "wordend", length) == 0) && (length >= 5)) { int firstChar = 1; + int offset; /* * If the current character isn't part of a word then just move @@ -2037,15 +2037,17 @@ StartEnd(textPtr, string, indexPtr) } segPtr = TkTextIndexToSeg(indexPtr, &offset); while (1) { + int chSize = 1; if (segPtr->typePtr == &tkTextCharType) { - c = segPtr->body.chars[offset]; - if (!isalnum(UCHAR(c)) && (c != '_')) { + Tcl_UniChar ch; + chSize = TclUtfToUniChar(segPtr->body.chars + offset, &ch); + if (!Tcl_UniCharIsWordChar(ch)) { break; } firstChar = 0; } - offset += 1; - indexPtr->byteIndex += sizeof(char); + offset += chSize; + indexPtr->byteIndex += chSize; if (offset >= segPtr->size) { segPtr = TkTextIndexToSeg(indexPtr, &offset); } @@ -2062,6 +2064,7 @@ StartEnd(textPtr, string, indexPtr) } else if ((*string == 'w') && (strncmp(string, "wordstart", length) == 0) && (length >= 5)) { int firstChar = 1; + int offset; if (modifier == TKINDEX_DISPLAY) { TkTextIndexForwChars(NULL, indexPtr, 0, indexPtr, @@ -2076,15 +2079,23 @@ StartEnd(textPtr, string, indexPtr) segPtr = TkTextIndexToSeg(indexPtr, &offset); while (1) { + int chSize = 1; if (segPtr->typePtr == &tkTextCharType) { - c = segPtr->body.chars[offset]; - if (!isalnum(UCHAR(c)) && (c != '_')) { + Tcl_UniChar ch; + TclUtfToUniChar(segPtr->body.chars + offset, &ch); + if (!Tcl_UniCharIsWordChar(ch)) { break; } + if (offset > 0) { + chSize = (segPtr->body.chars + offset + - Tcl_UtfPrev(segPtr->body.chars + offset, + segPtr->body.chars)); + } firstChar = 0; + } else { } - offset -= 1; - indexPtr->byteIndex -= sizeof(char); + offset -= chSize; + indexPtr->byteIndex -= chSize; if (offset < 0) { if (indexPtr->byteIndex < 0) { indexPtr->byteIndex = 0; diff --git a/tests/textIndex.test b/tests/textIndex.test index 1c53b17..3186fe0 100644 --- a/tests/textIndex.test +++ b/tests/textIndex.test @@ -6,7 +6,7 @@ # Copyright (c) 1998-1999 by Scriptics Corporation. # All rights reserved. # -# RCS: @(#) $Id: textIndex.test,v 1.10 2003/10/31 09:02:17 vincentdarley Exp $ +# RCS: @(#) $Id: textIndex.test,v 1.11 2004/01/07 15:20:53 vincentdarley Exp $ package require tcltest 2.1 eval tcltest::configure $argv @@ -645,7 +645,7 @@ test textIndex-15.15 {StartEnd} { list [catch {.t index {2.12 word}} msg] $msg } {1 {bad text index "2.12 word"}} -test testIndex-16.1 {TkTextPrintIndex} { +test textIndex-16.1 {TkTextPrintIndex} { set t [text .t2] $t insert end \n $t window create end -window [button $t.b] @@ -654,7 +654,7 @@ test testIndex-16.1 {TkTextPrintIndex} { catch {destroy $t} } 0 -test testIndex-16.2 {TkTextPrintIndex} { +test textIndex-16.2 {TkTextPrintIndex} { set t [text .t2] $t insert end \n $t window create end -window [button $t.b] @@ -663,7 +663,7 @@ test testIndex-16.2 {TkTextPrintIndex} { catch {destroy $t} } 0 -test testIndex-17.1 {Object indices} { +test textIndex-17.1 {Object indices} { set res {} set t [text .t2 -height 20] for {set i 0} {$i < 100} {incr i} { @@ -681,7 +681,7 @@ test testIndex-17.1 {Object indices} { list $res } {{@0,0 1.0 @0,0 37.0}} -test testIndex-18.1 {Object indices don't cache mark names} { +test textIndex-18.1 {Object indices don't cache mark names} { set res {} text .t2 .t2 insert 1.0 1234\n1234\n1234 @@ -737,54 +737,135 @@ set str [string repeat "hello " 20] .t insert end "$str one two three four five six seven height nine ten\n" .t insert end "$str one two three four five six seven height nine ten\n" -test testIndex-19.1 {Display lines} { +test textIndex-19.1 {Display lines} { .t index "2.7 displaylinestart" } {2.0} -test testIndex-19.2 {Display lines} { +test textIndex-19.2 {Display lines} { .t index "2.7 displaylineend" } {2.19} -test testIndex-19.3 {Display lines} { +test textIndex-19.3 {Display lines} { .t index "2.30 displaylinestart" } {2.20} -test testIndex-19.4 {Display lines} { +test textIndex-19.4 {Display lines} { .t index "2.30 displaylineend" } {2.39} -test testIndex-19.5 {Display lines} { +test textIndex-19.5 {Display lines} { .t index "2.40 displaylinestart" } {2.40} -test testIndex-19.6 {Display lines} { +test textIndex-19.6 {Display lines} { .t index "2.40 displaylineend" } {2.59} -test testIndex-19.7 {Display lines} { +test textIndex-19.7 {Display lines} { .t index "2.7 +1displaylines" } {2.27} -test testIndex-19.8 {Display lines} { +test textIndex-19.8 {Display lines} { .t index "2.7 -1displaylines" } {1.167} -test testIndex-19.9 {Display lines} { +test textIndex-19.9 {Display lines} { .t index "2.30 +1displaylines" } {2.50} -test testIndex-19.10 {Display lines} { +test textIndex-19.10 {Display lines} { .t index "2.30 -1displaylines" } {2.10} -test testIndex-19.11 {Display lines} { +test textIndex-19.11 {Display lines} { .t index "2.40 +1displaylines" } {2.60} -test testIndex-19.12 {Display lines} { +test textIndex-19.12 {Display lines} { .t index "2.40 -1displaylines" } {2.20} +proc text_test_word {startend chars start} { + destroy .t + text .t + .t insert end $chars + if {[regexp {end} $start]} { + set start [.t index "${start}chars -2c"] + } else { + set start [.t index "1.0 + ${start}chars"] + } + if {[.t compare $start >= "end-1c"]} { + set start "end-2c" + } + set res [.t index "$start $startend"] + .t count 1.0 $res +} + +# Following tests copied from tests from string wordstart/end in Tcl + +test textIndex-21.4 {text index wordend} { + text_test_word wordend abc. -1 +} 3 +test textIndex-21.5 {text index wordend} { + text_test_word wordend abc. 100 +} 4 +test textIndex-21.6 {text index wordend} { + text_test_word wordend "word_one two three" 2 +} 8 +test textIndex-21.7 {text index wordend} { + text_test_word wordend "one .&# three" 5 +} 6 +test textIndex-21.8 {text index wordend} { + text_test_word worde "x.y" 0 +} 1 +test textIndex-21.9 {text index wordend} { + text_test_word worde "x.y" end-1 +} 2 +test textIndex-21.10 {text index wordend, unicode} { + text_test_word wordend "xyz\u00c7de fg" 0 +} 6 +test textIndex-21.11 {text index wordend, unicode} { + text_test_word wordend "xyz\uc700de fg" 0 +} 6 +test textIndex-21.12 {text index wordend, unicode} { + text_test_word wordend "xyz\u203fde fg" 0 +} 6 +test textIndex-21.13 {text index wordend, unicode} { + text_test_word wordend "xyz\u2045de fg" 0 +} 3 +test textIndex-21.14 {text index wordend, unicode} { + text_test_word wordend "\uc700\uc700 abc" 8 +} 6 + +test textIndex-22.5 {text index wordstart} { + text_test_word wordstart "one two three_words" 400 +} 8 +test textIndex-22.6 {text index wordstart} { + text_test_word wordstart "one two three_words" 2 +} 0 +test textIndex-22.7 {text index wordstart} { + text_test_word wordstart "one two three_words" -2 +} 0 +test textIndex-22.8 {text index wordstart} { + text_test_word wordstart "one .*&^ three" 6 +} 6 +test textIndex-22.9 {text index wordstart} { + text_test_word wordstart "one two three" 4 +} 4 +test textIndex-22.10 {text index wordstart} { + text_test_word wordstart "one two three" end-5 +} 7 +test textIndex-22.11 {text index wordstart, unicode} { + text_test_word wordstart "one tw\u00c7o three" 7 +} 4 +test textIndex-22.12 {text index wordstart, unicode} { + text_test_word wordstart "ab\uc700\uc700 cdef ghi" 12 +} 10 +test textIndex-22.13 {text index wordstart, unicode} { + text_test_word wordstart "\uc700\uc700 abc" 8 +} 3 + + # cleanup rename textimage {} catch {destroy .t} -- cgit v0.12