summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorvincentdarley <vincentdarley>2004-01-07 15:20:53 (GMT)
committervincentdarley <vincentdarley>2004-01-07 15:20:53 (GMT)
commit4bed7923e3d13f9f4e7841c45e3ecfca69d74961 (patch)
tree89d6be6bd94342eaee4b3870778c5b4d6546dffe
parent742d82368f0fedd3c09530cdfc01267ca86a8e73 (diff)
downloadtk-4bed7923e3d13f9f4e7841c45e3ecfca69d74961.zip
tk-4bed7923e3d13f9f4e7841c45e3ecfca69d74961.tar.gz
tk-4bed7923e3d13f9f4e7841c45e3ecfca69d74961.tar.bz2
utf-8 aware text wordstart and wordend
-rw-r--r--ChangeLog7
-rw-r--r--generic/tkTextIndex.c31
-rw-r--r--tests/textIndex.test115
3 files changed, 126 insertions, 27 deletions
diff --git a/ChangeLog b/ChangeLog
index b8a5d28..489682b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,12 @@
2004-01-07 Vince Darley <vincentdarley@users.sourceforge.net>
+ * generic/tkTextIndex.c:
+ * tests/textIndex.test: fixed bug in which 'wordstart' and
+ 'wordend' were not utf-8 aware (they haven't been changed since
+ Tk 8.0), and added tests.
+
+2004-01-07 Vince Darley <vincentdarley@users.sourceforge.net>
+
* win/tkWinMenu.c: only provide a submenu handle when the
MF_POPUP flag is given, fixing a recently-introduced crash
when submenus are disabled. Also better error checking for
diff --git a/generic/tkTextIndex.c b/generic/tkTextIndex.c
index 667cb64..60138eb 100644
--- a/generic/tkTextIndex.c
+++ b/generic/tkTextIndex.c
@@ -10,7 +10,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tkTextIndex.c,v 1.13 2003/12/15 11:51:06 vincentdarley Exp $
+ * RCS: @(#) $Id: tkTextIndex.c,v 1.14 2004/01/07 15:21:02 vincentdarley Exp $
*/
#include "default.h"
@@ -1957,7 +1957,6 @@ StartEnd(textPtr, string, indexPtr)
TkTextIndex *indexPtr; /* Index to modify based on string. */
{
CONST char *p;
- int c, offset;
size_t length;
register TkTextSegment *segPtr;
int modifier;
@@ -2024,6 +2023,7 @@ StartEnd(textPtr, string, indexPtr)
} else if ((*string == 'w') && (strncmp(string, "wordend", length) == 0)
&& (length >= 5)) {
int firstChar = 1;
+ int offset;
/*
* If the current character isn't part of a word then just move
@@ -2037,15 +2037,17 @@ StartEnd(textPtr, string, indexPtr)
}
segPtr = TkTextIndexToSeg(indexPtr, &offset);
while (1) {
+ int chSize = 1;
if (segPtr->typePtr == &tkTextCharType) {
- c = segPtr->body.chars[offset];
- if (!isalnum(UCHAR(c)) && (c != '_')) {
+ Tcl_UniChar ch;
+ chSize = TclUtfToUniChar(segPtr->body.chars + offset, &ch);
+ if (!Tcl_UniCharIsWordChar(ch)) {
break;
}
firstChar = 0;
}
- offset += 1;
- indexPtr->byteIndex += sizeof(char);
+ offset += chSize;
+ indexPtr->byteIndex += chSize;
if (offset >= segPtr->size) {
segPtr = TkTextIndexToSeg(indexPtr, &offset);
}
@@ -2062,6 +2064,7 @@ StartEnd(textPtr, string, indexPtr)
} else if ((*string == 'w') && (strncmp(string, "wordstart", length) == 0)
&& (length >= 5)) {
int firstChar = 1;
+ int offset;
if (modifier == TKINDEX_DISPLAY) {
TkTextIndexForwChars(NULL, indexPtr, 0, indexPtr,
@@ -2076,15 +2079,23 @@ StartEnd(textPtr, string, indexPtr)
segPtr = TkTextIndexToSeg(indexPtr, &offset);
while (1) {
+ int chSize = 1;
if (segPtr->typePtr == &tkTextCharType) {
- c = segPtr->body.chars[offset];
- if (!isalnum(UCHAR(c)) && (c != '_')) {
+ Tcl_UniChar ch;
+ TclUtfToUniChar(segPtr->body.chars + offset, &ch);
+ if (!Tcl_UniCharIsWordChar(ch)) {
break;
}
+ if (offset > 0) {
+ chSize = (segPtr->body.chars + offset
+ - Tcl_UtfPrev(segPtr->body.chars + offset,
+ segPtr->body.chars));
+ }
firstChar = 0;
+ } else {
}
- offset -= 1;
- indexPtr->byteIndex -= sizeof(char);
+ offset -= chSize;
+ indexPtr->byteIndex -= chSize;
if (offset < 0) {
if (indexPtr->byteIndex < 0) {
indexPtr->byteIndex = 0;
diff --git a/tests/textIndex.test b/tests/textIndex.test
index 1c53b17..3186fe0 100644
--- a/tests/textIndex.test
+++ b/tests/textIndex.test
@@ -6,7 +6,7 @@
# Copyright (c) 1998-1999 by Scriptics Corporation.
# All rights reserved.
#
-# RCS: @(#) $Id: textIndex.test,v 1.10 2003/10/31 09:02:17 vincentdarley Exp $
+# RCS: @(#) $Id: textIndex.test,v 1.11 2004/01/07 15:20:53 vincentdarley Exp $
package require tcltest 2.1
eval tcltest::configure $argv
@@ -645,7 +645,7 @@ test textIndex-15.15 {StartEnd} {
list [catch {.t index {2.12 word}} msg] $msg
} {1 {bad text index "2.12 word"}}
-test testIndex-16.1 {TkTextPrintIndex} {
+test textIndex-16.1 {TkTextPrintIndex} {
set t [text .t2]
$t insert end \n
$t window create end -window [button $t.b]
@@ -654,7 +654,7 @@ test testIndex-16.1 {TkTextPrintIndex} {
catch {destroy $t}
} 0
-test testIndex-16.2 {TkTextPrintIndex} {
+test textIndex-16.2 {TkTextPrintIndex} {
set t [text .t2]
$t insert end \n
$t window create end -window [button $t.b]
@@ -663,7 +663,7 @@ test testIndex-16.2 {TkTextPrintIndex} {
catch {destroy $t}
} 0
-test testIndex-17.1 {Object indices} {
+test textIndex-17.1 {Object indices} {
set res {}
set t [text .t2 -height 20]
for {set i 0} {$i < 100} {incr i} {
@@ -681,7 +681,7 @@ test testIndex-17.1 {Object indices} {
list $res
} {{@0,0 1.0 @0,0 37.0}}
-test testIndex-18.1 {Object indices don't cache mark names} {
+test textIndex-18.1 {Object indices don't cache mark names} {
set res {}
text .t2
.t2 insert 1.0 1234\n1234\n1234
@@ -737,54 +737,135 @@ set str [string repeat "hello " 20]
.t insert end "$str one two three four five six seven height nine ten\n"
.t insert end "$str one two three four five six seven height nine ten\n"
-test testIndex-19.1 {Display lines} {
+test textIndex-19.1 {Display lines} {
.t index "2.7 displaylinestart"
} {2.0}
-test testIndex-19.2 {Display lines} {
+test textIndex-19.2 {Display lines} {
.t index "2.7 displaylineend"
} {2.19}
-test testIndex-19.3 {Display lines} {
+test textIndex-19.3 {Display lines} {
.t index "2.30 displaylinestart"
} {2.20}
-test testIndex-19.4 {Display lines} {
+test textIndex-19.4 {Display lines} {
.t index "2.30 displaylineend"
} {2.39}
-test testIndex-19.5 {Display lines} {
+test textIndex-19.5 {Display lines} {
.t index "2.40 displaylinestart"
} {2.40}
-test testIndex-19.6 {Display lines} {
+test textIndex-19.6 {Display lines} {
.t index "2.40 displaylineend"
} {2.59}
-test testIndex-19.7 {Display lines} {
+test textIndex-19.7 {Display lines} {
.t index "2.7 +1displaylines"
} {2.27}
-test testIndex-19.8 {Display lines} {
+test textIndex-19.8 {Display lines} {
.t index "2.7 -1displaylines"
} {1.167}
-test testIndex-19.9 {Display lines} {
+test textIndex-19.9 {Display lines} {
.t index "2.30 +1displaylines"
} {2.50}
-test testIndex-19.10 {Display lines} {
+test textIndex-19.10 {Display lines} {
.t index "2.30 -1displaylines"
} {2.10}
-test testIndex-19.11 {Display lines} {
+test textIndex-19.11 {Display lines} {
.t index "2.40 +1displaylines"
} {2.60}
-test testIndex-19.12 {Display lines} {
+test textIndex-19.12 {Display lines} {
.t index "2.40 -1displaylines"
} {2.20}
+proc text_test_word {startend chars start} {
+ destroy .t
+ text .t
+ .t insert end $chars
+ if {[regexp {end} $start]} {
+ set start [.t index "${start}chars -2c"]
+ } else {
+ set start [.t index "1.0 + ${start}chars"]
+ }
+ if {[.t compare $start >= "end-1c"]} {
+ set start "end-2c"
+ }
+ set res [.t index "$start $startend"]
+ .t count 1.0 $res
+}
+
+# Following tests copied from tests from string wordstart/end in Tcl
+
+test textIndex-21.4 {text index wordend} {
+ text_test_word wordend abc. -1
+} 3
+test textIndex-21.5 {text index wordend} {
+ text_test_word wordend abc. 100
+} 4
+test textIndex-21.6 {text index wordend} {
+ text_test_word wordend "word_one two three" 2
+} 8
+test textIndex-21.7 {text index wordend} {
+ text_test_word wordend "one .&# three" 5
+} 6
+test textIndex-21.8 {text index wordend} {
+ text_test_word worde "x.y" 0
+} 1
+test textIndex-21.9 {text index wordend} {
+ text_test_word worde "x.y" end-1
+} 2
+test textIndex-21.10 {text index wordend, unicode} {
+ text_test_word wordend "xyz\u00c7de fg" 0
+} 6
+test textIndex-21.11 {text index wordend, unicode} {
+ text_test_word wordend "xyz\uc700de fg" 0
+} 6
+test textIndex-21.12 {text index wordend, unicode} {
+ text_test_word wordend "xyz\u203fde fg" 0
+} 6
+test textIndex-21.13 {text index wordend, unicode} {
+ text_test_word wordend "xyz\u2045de fg" 0
+} 3
+test textIndex-21.14 {text index wordend, unicode} {
+ text_test_word wordend "\uc700\uc700 abc" 8
+} 6
+
+test textIndex-22.5 {text index wordstart} {
+ text_test_word wordstart "one two three_words" 400
+} 8
+test textIndex-22.6 {text index wordstart} {
+ text_test_word wordstart "one two three_words" 2
+} 0
+test textIndex-22.7 {text index wordstart} {
+ text_test_word wordstart "one two three_words" -2
+} 0
+test textIndex-22.8 {text index wordstart} {
+ text_test_word wordstart "one .*&^ three" 6
+} 6
+test textIndex-22.9 {text index wordstart} {
+ text_test_word wordstart "one two three" 4
+} 4
+test textIndex-22.10 {text index wordstart} {
+ text_test_word wordstart "one two three" end-5
+} 7
+test textIndex-22.11 {text index wordstart, unicode} {
+ text_test_word wordstart "one tw\u00c7o three" 7
+} 4
+test textIndex-22.12 {text index wordstart, unicode} {
+ text_test_word wordstart "ab\uc700\uc700 cdef ghi" 12
+} 10
+test textIndex-22.13 {text index wordstart, unicode} {
+ text_test_word wordstart "\uc700\uc700 abc" 8
+} 3
+
+
# cleanup
rename textimage {}
catch {destroy .t}