From f4ba6d32f660d1c4353b87eef65be808dfcb31a0 Mon Sep 17 00:00:00 2001 From: dkf Date: Wed, 1 Aug 2007 15:17:11 +0000 Subject: Rewrote word.tcl for greater efficiency. [Bug 1764318] --- ChangeLog | 48 ++++++++++-------- library/word.tcl | 150 ++++++++++++++++++++++++++++++------------------------- 2 files changed, 108 insertions(+), 90 deletions(-) diff --git a/ChangeLog b/ChangeLog index 4270533..a40d045 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,23 +1,27 @@ +2007-08-01 Donal K. Fellows + + * library/word.tcl: Rewrote for greater efficiency. [Bug 1764318] + 2007-08-01 Pat Thoyts * generic/tclInt.h: Added a TclOffset macro ala Tk_Offset to - * generic/tclVar.c: abstract out 'offsetof' which may not be + * generic/tclVar.c: abstract out 'offsetof' which may not be * generic/tclExceute.c: defined (eg: msvc6). 2007-08-01 Miguel Sofer * generic/tclVar.c (TclCleanupVar): fix [Bug 1765225], thx Larry - Virden. + Virden. 2007-07-31 Miguel Sofer * doc/Hash.3: * generic/tclHash.c: - * generic/tclObj.c: - * generic/tclThreadStorage.c: (changes part of the patch below) - Stop Tcl_CreateHashVar from resetting hPtr->clientData to NULL - after calling the allocEntryProc for a custom table. - + * generic/tclObj.c: + * generic/tclThreadStorage.c: (changes part of the patch below) + Stop Tcl_CreateHashVar from resetting hPtr->clientData to NULL after + calling the allocEntryProc for a custom table. + * generic/tcl.h: * generic/tclBasic.c: * generic/tclCmdIL.c: @@ -46,27 +50,27 @@ malfunction. Var flag values and semantics changed too. 2. 'struct Bytecode' has an additional field that has to be initialised to NULL - 3. 'struct Namespace' is larger, as the varTable is now one - pointer larger than a Tcl_HashTable. Direct access to its fields - will malfunction. - 4. 'struct CallFrame' grew one more field (the second such growth - with respect to Tcl8.4). - 5. api change for the functions TclFindCompiledLocal, - TclDeleteVars and many internal functions in tclVar.c - - Additionally, direct access to variable hash tables via the - standard Tcl_Hash* interface is to be considered as deprecated. It - still works in the present version, but will be broken by further - specialisation of these hash tables. This concerns especially the - table of array elements in an array, as well as the varTable field - in the Namespace struct. + 3. 'struct Namespace' is larger, as the varTable is now one pointer + larger than a Tcl_HashTable. Direct access to its fields will + malfunction. + 4. 'struct CallFrame' grew one more field (the second such growth with + respect to Tcl8.4). + 5. api change for the functions TclFindCompiledLocal, TclDeleteVars + and many internal functions in tclVar.c + + Additionally, direct access to variable hash tables via the standard + Tcl_Hash* interface is to be considered as deprecated. It still works + in the present version, but will be broken by further specialisation + of these hash tables. This concerns especially the table of array + elements in an array, as well as the varTable field in the Namespace + struct. 2007-07-31 Miguel Sofer * unix/configure.in: allow use of 'inline' in Tcl sources * win/configure.in: [Patch 1754128] * win/makefile.vc: Regen with autoconf 2.61 - + 2007-07-31 Donal K. Fellows * unix/tclUnixInit.c (TclpSetVariables): Use the thread-safe getpwuid diff --git a/library/word.tcl b/library/word.tcl index 05c3bab..0eb9237 100644 --- a/library/word.tcl +++ b/library/word.tcl @@ -1,132 +1,146 @@ # word.tcl -- # -# This file defines various procedures for computing word boundaries -# in strings. This file is primarily needed so Tk text and entry -# widgets behave properly for different platforms. +# This file defines various procedures for computing word boundaries in +# strings. This file is primarily needed so Tk text and entry widgets behave +# properly for different platforms. # # Copyright (c) 1996 by Sun Microsystems, Inc. # Copyright (c) 1998 by Scritpics Corporation. # -# See the file "license.terms" for information on usage and redistribution -# of this file, and for a DISCLAIMER OF ALL WARRANTIES. -# -# RCS: @(#) $Id: word.tcl,v 1.8 2005/07/23 04:12:49 dgp Exp $ +# See the file "license.terms" for information on usage and redistribution of +# this file, and for a DISCLAIMER OF ALL WARRANTIES. +# +# RCS: @(#) $Id: word.tcl,v 1.9 2007/08/01 15:17:12 dkf Exp $ # The following variables are used to determine which characters are -# interpreted as white space. +# interpreted as white space. if {$::tcl_platform(platform) eq "windows"} { # Windows style - any but a unicode space char - set tcl_wordchars "\\S" - set tcl_nonwordchars "\\s" + set ::tcl_wordchars {\S} + set ::tcl_nonwordchars {\s} } else { # Motif style - any unicode word char (number, letter, or underscore) - set tcl_wordchars "\\w" - set tcl_nonwordchars "\\W" + set ::tcl_wordchars {\w} + set ::tcl_nonwordchars {\W} +} + +# Arrange for caches of the real matcher REs to be kept, which enables the REs +# themselves to be cached for greater performance (and somewhat greater +# clarity too). + +namespace eval ::tcl { + variable WordBreakRE + array set WordBreakRE {} + + proc UpdateWordBreakREs args { + # Ignores the arguments + global tcl_wordchars tcl_nonwordchars + variable WordBreakRE + + # To keep the RE strings short... + set letter $tcl_wordchars + set space $tcl_nonwordchars + + set WordBreakRE(after) "$letter$space|$space$letter" + set WordBreakRE(before) "^.*($letter$space|$space$letter)" + set WordBreakRE(end) "$space*$letter+$space" + set WordBreakRE(next) "$letter*$space+$letter" + set WordBreakRE(previous) "$space*($letter+)$space*\$" + } + + # Initialize the cache + UpdateWordBreakREs + trace add variable ::tcl_wordchars write ::tcl::UpdateWordBreakREs + trace add variable ::tcl_nonwordchars write ::tcl::UpdateWordBreakREs } # tcl_wordBreakAfter -- # -# This procedure returns the index of the first word boundary -# after the starting point in the given string, or -1 if there -# are no more boundaries in the given string. The index returned refers -# to the first character of the pair that comprises a boundary. +# This procedure returns the index of the first word boundary after the +# starting point in the given string, or -1 if there are no more boundaries in +# the given string. The index returned refers to the first character of the +# pair that comprises a boundary. # # Arguments: # str - String to search. # start - Index into string specifying starting point. proc tcl_wordBreakAfter {str start} { - global tcl_nonwordchars tcl_wordchars - set str [string range $str $start end] - if {[regexp -indices "$tcl_wordchars$tcl_nonwordchars|$tcl_nonwordchars$tcl_wordchars" $str result]} { - return [expr {[lindex $result 1] + $start}] - } - return -1 + variable ::tcl::WordBreakRE + set result {-1 -1} + regexp -indices -start $start $WordBreakRE(after) $str result + return [lindex $result 1] } # tcl_wordBreakBefore -- # -# This procedure returns the index of the first word boundary -# before the starting point in the given string, or -1 if there -# are no more boundaries in the given string. The index returned -# refers to the second character of the pair that comprises a boundary. +# This procedure returns the index of the first word boundary before the +# starting point in the given string, or -1 if there are no more boundaries in +# the given string. The index returned refers to the second character of the +# pair that comprises a boundary. # # Arguments: # str - String to search. # start - Index into string specifying starting point. proc tcl_wordBreakBefore {str start} { - global tcl_nonwordchars tcl_wordchars - if {$start eq "end"} { - set start [string length $str] - } - if {[regexp -indices "^.*($tcl_wordchars$tcl_nonwordchars|$tcl_nonwordchars$tcl_wordchars)" [string range $str 0 $start] result]} { - return [lindex $result 1] - } - return -1 + variable ::tcl::WordBreakRE + set result {-1 -1} + regexp -indices $WordBreakRE(before) [string range $str 0 $start] result + return [lindex $result 1] } # tcl_endOfWord -- # -# This procedure returns the index of the first end-of-word location -# after a starting index in the given string. An end-of-word location -# is defined to be the first whitespace character following the first -# non-whitespace character after the starting point. Returns -1 if -# there are no more words after the starting point. +# This procedure returns the index of the first end-of-word location after a +# starting index in the given string. An end-of-word location is defined to be +# the first whitespace character following the first non-whitespace character +# after the starting point. Returns -1 if there are no more words after the +# starting point. # # Arguments: # str - String to search. # start - Index into string specifying starting point. proc tcl_endOfWord {str start} { - global tcl_nonwordchars tcl_wordchars - if {[regexp -indices "$tcl_nonwordchars*$tcl_wordchars+$tcl_nonwordchars" \ - [string range $str $start end] result]} { - return [expr {[lindex $result 1] + $start}] - } - return -1 + variable ::tcl::WordBreakRE + set result {-1 -1} + regexp -indices -start $start $WordBreakRE(end) $str result + return [lindex $result 1] } # tcl_startOfNextWord -- # -# This procedure returns the index of the first start-of-word location -# after a starting index in the given string. A start-of-word -# location is defined to be a non-whitespace character following a -# whitespace character. Returns -1 if there are no more start-of-word -# locations after the starting point. +# This procedure returns the index of the first start-of-word location after a +# starting index in the given string. A start-of-word location is defined to +# be a non-whitespace character following a whitespace character. Returns -1 +# if there are no more start-of-word locations after the starting point. # # Arguments: # str - String to search. # start - Index into string specifying starting point. proc tcl_startOfNextWord {str start} { - global tcl_nonwordchars tcl_wordchars - if {[regexp -indices "$tcl_wordchars*$tcl_nonwordchars+$tcl_wordchars" \ - [string range $str $start end] result]} { - return [expr {[lindex $result 1] + $start}] - } - return -1 + variable ::tcl::WordBreakRE + set result {-1 -1} + regexp -indices -start $start $WordBreakRE(next) $str result + return [lindex $result 1] } # tcl_startOfPreviousWord -- # -# This procedure returns the index of the first start-of-word location -# before a starting index in the given string. +# This procedure returns the index of the first start-of-word location before +# a starting index in the given string. # # Arguments: # str - String to search. # start - Index into string specifying starting point. proc tcl_startOfPreviousWord {str start} { - global tcl_nonwordchars tcl_wordchars - if {$start eq "end"} { - set start [string length $str] - } - if {[regexp -indices \ - "$tcl_nonwordchars*($tcl_wordchars+)$tcl_nonwordchars*\$" \ - [string range $str 0 [expr {$start - 1}]] result word]} { - return [lindex $word 0] - } - return -1 + variable ::tcl::WordBreakRE + set word {-1 -1} + regexp -indices $WordBreakRE(previous) [string range $str 0 $start-1] \ + result word + return [lindex $word 0] } -- cgit v0.12