diff options
author | dkf <donal.k.fellows@manchester.ac.uk> | 2007-08-01 15:17:11 (GMT) |
---|---|---|
committer | dkf <donal.k.fellows@manchester.ac.uk> | 2007-08-01 15:17:11 (GMT) |
commit | f4ba6d32f660d1c4353b87eef65be808dfcb31a0 (patch) | |
tree | 3946de5f0bad36db60101a2809d9aece99113b9e /library | |
parent | 151923cca7b86c51a2292b427fbf70e75b05bbe3 (diff) | |
download | tcl-f4ba6d32f660d1c4353b87eef65be808dfcb31a0.zip tcl-f4ba6d32f660d1c4353b87eef65be808dfcb31a0.tar.gz tcl-f4ba6d32f660d1c4353b87eef65be808dfcb31a0.tar.bz2 |
Rewrote word.tcl for greater efficiency. [Bug 1764318]
Diffstat (limited to 'library')
-rw-r--r-- | library/word.tcl | 150 |
1 files changed, 82 insertions, 68 deletions
diff --git a/library/word.tcl b/library/word.tcl index 05c3bab..0eb9237 100644 --- a/library/word.tcl +++ b/library/word.tcl @@ -1,132 +1,146 @@ # word.tcl -- # -# This file defines various procedures for computing word boundaries -# in strings. This file is primarily needed so Tk text and entry -# widgets behave properly for different platforms. +# This file defines various procedures for computing word boundaries in +# strings. This file is primarily needed so Tk text and entry widgets behave +# properly for different platforms. # # Copyright (c) 1996 by Sun Microsystems, Inc. # Copyright (c) 1998 by Scritpics Corporation. # -# See the file "license.terms" for information on usage and redistribution -# of this file, and for a DISCLAIMER OF ALL WARRANTIES. -# -# RCS: @(#) $Id: word.tcl,v 1.8 2005/07/23 04:12:49 dgp Exp $ +# See the file "license.terms" for information on usage and redistribution of +# this file, and for a DISCLAIMER OF ALL WARRANTIES. +# +# RCS: @(#) $Id: word.tcl,v 1.9 2007/08/01 15:17:12 dkf Exp $ # The following variables are used to determine which characters are -# interpreted as white space. +# interpreted as white space. if {$::tcl_platform(platform) eq "windows"} { # Windows style - any but a unicode space char - set tcl_wordchars "\\S" - set tcl_nonwordchars "\\s" + set ::tcl_wordchars {\S} + set ::tcl_nonwordchars {\s} } else { # Motif style - any unicode word char (number, letter, or underscore) - set tcl_wordchars "\\w" - set tcl_nonwordchars "\\W" + set ::tcl_wordchars {\w} + set ::tcl_nonwordchars {\W} +} + +# Arrange for caches of the real matcher REs to be kept, which enables the REs +# themselves to be cached for greater performance (and somewhat greater +# clarity too). + +namespace eval ::tcl { + variable WordBreakRE + array set WordBreakRE {} + + proc UpdateWordBreakREs args { + # Ignores the arguments + global tcl_wordchars tcl_nonwordchars + variable WordBreakRE + + # To keep the RE strings short... + set letter $tcl_wordchars + set space $tcl_nonwordchars + + set WordBreakRE(after) "$letter$space|$space$letter" + set WordBreakRE(before) "^.*($letter$space|$space$letter)" + set WordBreakRE(end) "$space*$letter+$space" + set WordBreakRE(next) "$letter*$space+$letter" + set WordBreakRE(previous) "$space*($letter+)$space*\$" + } + + # Initialize the cache + UpdateWordBreakREs + trace add variable ::tcl_wordchars write ::tcl::UpdateWordBreakREs + trace add variable ::tcl_nonwordchars write ::tcl::UpdateWordBreakREs } # tcl_wordBreakAfter -- # -# This procedure returns the index of the first word boundary -# after the starting point in the given string, or -1 if there -# are no more boundaries in the given string. The index returned refers -# to the first character of the pair that comprises a boundary. +# This procedure returns the index of the first word boundary after the +# starting point in the given string, or -1 if there are no more boundaries in +# the given string. The index returned refers to the first character of the +# pair that comprises a boundary. # # Arguments: # str - String to search. # start - Index into string specifying starting point. proc tcl_wordBreakAfter {str start} { - global tcl_nonwordchars tcl_wordchars - set str [string range $str $start end] - if {[regexp -indices "$tcl_wordchars$tcl_nonwordchars|$tcl_nonwordchars$tcl_wordchars" $str result]} { - return [expr {[lindex $result 1] + $start}] - } - return -1 + variable ::tcl::WordBreakRE + set result {-1 -1} + regexp -indices -start $start $WordBreakRE(after) $str result + return [lindex $result 1] } # tcl_wordBreakBefore -- # -# This procedure returns the index of the first word boundary -# before the starting point in the given string, or -1 if there -# are no more boundaries in the given string. The index returned -# refers to the second character of the pair that comprises a boundary. +# This procedure returns the index of the first word boundary before the +# starting point in the given string, or -1 if there are no more boundaries in +# the given string. The index returned refers to the second character of the +# pair that comprises a boundary. # # Arguments: # str - String to search. # start - Index into string specifying starting point. proc tcl_wordBreakBefore {str start} { - global tcl_nonwordchars tcl_wordchars - if {$start eq "end"} { - set start [string length $str] - } - if {[regexp -indices "^.*($tcl_wordchars$tcl_nonwordchars|$tcl_nonwordchars$tcl_wordchars)" [string range $str 0 $start] result]} { - return [lindex $result 1] - } - return -1 + variable ::tcl::WordBreakRE + set result {-1 -1} + regexp -indices $WordBreakRE(before) [string range $str 0 $start] result + return [lindex $result 1] } # tcl_endOfWord -- # -# This procedure returns the index of the first end-of-word location -# after a starting index in the given string. An end-of-word location -# is defined to be the first whitespace character following the first -# non-whitespace character after the starting point. Returns -1 if -# there are no more words after the starting point. +# This procedure returns the index of the first end-of-word location after a +# starting index in the given string. An end-of-word location is defined to be +# the first whitespace character following the first non-whitespace character +# after the starting point. Returns -1 if there are no more words after the +# starting point. # # Arguments: # str - String to search. # start - Index into string specifying starting point. proc tcl_endOfWord {str start} { - global tcl_nonwordchars tcl_wordchars - if {[regexp -indices "$tcl_nonwordchars*$tcl_wordchars+$tcl_nonwordchars" \ - [string range $str $start end] result]} { - return [expr {[lindex $result 1] + $start}] - } - return -1 + variable ::tcl::WordBreakRE + set result {-1 -1} + regexp -indices -start $start $WordBreakRE(end) $str result + return [lindex $result 1] } # tcl_startOfNextWord -- # -# This procedure returns the index of the first start-of-word location -# after a starting index in the given string. A start-of-word -# location is defined to be a non-whitespace character following a -# whitespace character. Returns -1 if there are no more start-of-word -# locations after the starting point. +# This procedure returns the index of the first start-of-word location after a +# starting index in the given string. A start-of-word location is defined to +# be a non-whitespace character following a whitespace character. Returns -1 +# if there are no more start-of-word locations after the starting point. # # Arguments: # str - String to search. # start - Index into string specifying starting point. proc tcl_startOfNextWord {str start} { - global tcl_nonwordchars tcl_wordchars - if {[regexp -indices "$tcl_wordchars*$tcl_nonwordchars+$tcl_wordchars" \ - [string range $str $start end] result]} { - return [expr {[lindex $result 1] + $start}] - } - return -1 + variable ::tcl::WordBreakRE + set result {-1 -1} + regexp -indices -start $start $WordBreakRE(next) $str result + return [lindex $result 1] } # tcl_startOfPreviousWord -- # -# This procedure returns the index of the first start-of-word location -# before a starting index in the given string. +# This procedure returns the index of the first start-of-word location before +# a starting index in the given string. # # Arguments: # str - String to search. # start - Index into string specifying starting point. proc tcl_startOfPreviousWord {str start} { - global tcl_nonwordchars tcl_wordchars - if {$start eq "end"} { - set start [string length $str] - } - if {[regexp -indices \ - "$tcl_nonwordchars*($tcl_wordchars+)$tcl_nonwordchars*\$" \ - [string range $str 0 [expr {$start - 1}]] result word]} { - return [lindex $word 0] - } - return -1 + variable ::tcl::WordBreakRE + set word {-1 -1} + regexp -indices $WordBreakRE(previous) [string range $str 0 $start-1] \ + result word + return [lindex $word 0] } |