summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog48
-rw-r--r--library/word.tcl150
2 files changed, 108 insertions, 90 deletions
diff --git a/ChangeLog b/ChangeLog
index 4270533..a40d045 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,23 +1,27 @@
+2007-08-01 Donal K. Fellows <donal.k.fellows@manchester.ac.uk>
+
+ * library/word.tcl: Rewrote for greater efficiency. [Bug 1764318]
+
2007-08-01 Pat Thoyts <patthoyts@users.sourceforge.net>
* generic/tclInt.h: Added a TclOffset macro ala Tk_Offset to
- * generic/tclVar.c: abstract out 'offsetof' which may not be
+ * generic/tclVar.c: abstract out 'offsetof' which may not be
* generic/tclExceute.c: defined (eg: msvc6).
2007-08-01 Miguel Sofer <msofer@users.sf.net>
* generic/tclVar.c (TclCleanupVar): fix [Bug 1765225], thx Larry
- Virden.
+ Virden.
2007-07-31 Miguel Sofer <msofer@users.sf.net>
* doc/Hash.3:
* generic/tclHash.c:
- * generic/tclObj.c:
- * generic/tclThreadStorage.c: (changes part of the patch below)
- Stop Tcl_CreateHashVar from resetting hPtr->clientData to NULL
- after calling the allocEntryProc for a custom table.
-
+ * generic/tclObj.c:
+ * generic/tclThreadStorage.c: (changes part of the patch below)
+ Stop Tcl_CreateHashVar from resetting hPtr->clientData to NULL after
+ calling the allocEntryProc for a custom table.
+
* generic/tcl.h:
* generic/tclBasic.c:
* generic/tclCmdIL.c:
@@ -46,27 +50,27 @@
malfunction. Var flag values and semantics changed too.
2. 'struct Bytecode' has an additional field that has to be
initialised to NULL
- 3. 'struct Namespace' is larger, as the varTable is now one
- pointer larger than a Tcl_HashTable. Direct access to its fields
- will malfunction.
- 4. 'struct CallFrame' grew one more field (the second such growth
- with respect to Tcl8.4).
- 5. api change for the functions TclFindCompiledLocal,
- TclDeleteVars and many internal functions in tclVar.c
-
- Additionally, direct access to variable hash tables via the
- standard Tcl_Hash* interface is to be considered as deprecated. It
- still works in the present version, but will be broken by further
- specialisation of these hash tables. This concerns especially the
- table of array elements in an array, as well as the varTable field
- in the Namespace struct.
+ 3. 'struct Namespace' is larger, as the varTable is now one pointer
+ larger than a Tcl_HashTable. Direct access to its fields will
+ malfunction.
+ 4. 'struct CallFrame' grew one more field (the second such growth with
+ respect to Tcl8.4).
+ 5. api change for the functions TclFindCompiledLocal, TclDeleteVars
+ and many internal functions in tclVar.c
+
+ Additionally, direct access to variable hash tables via the standard
+ Tcl_Hash* interface is to be considered as deprecated. It still works
+ in the present version, but will be broken by further specialisation
+ of these hash tables. This concerns especially the table of array
+ elements in an array, as well as the varTable field in the Namespace
+ struct.
2007-07-31 Miguel Sofer <msofer@users.sf.net>
* unix/configure.in: allow use of 'inline' in Tcl sources
* win/configure.in: [Patch 1754128]
* win/makefile.vc: Regen with autoconf 2.61
-
+
2007-07-31 Donal K. Fellows <donal.k.fellows@manchester.ac.uk>
* unix/tclUnixInit.c (TclpSetVariables): Use the thread-safe getpwuid
diff --git a/library/word.tcl b/library/word.tcl
index 05c3bab..0eb9237 100644
--- a/library/word.tcl
+++ b/library/word.tcl
@@ -1,132 +1,146 @@
# word.tcl --
#
-# This file defines various procedures for computing word boundaries
-# in strings. This file is primarily needed so Tk text and entry
-# widgets behave properly for different platforms.
+# This file defines various procedures for computing word boundaries in
+# strings. This file is primarily needed so Tk text and entry widgets behave
+# properly for different platforms.
#
# Copyright (c) 1996 by Sun Microsystems, Inc.
# Copyright (c) 1998 by Scritpics Corporation.
#
-# See the file "license.terms" for information on usage and redistribution
-# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
-#
-# RCS: @(#) $Id: word.tcl,v 1.8 2005/07/23 04:12:49 dgp Exp $
+# See the file "license.terms" for information on usage and redistribution of
+# this file, and for a DISCLAIMER OF ALL WARRANTIES.
+#
+# RCS: @(#) $Id: word.tcl,v 1.9 2007/08/01 15:17:12 dkf Exp $
# The following variables are used to determine which characters are
-# interpreted as white space.
+# interpreted as white space.
if {$::tcl_platform(platform) eq "windows"} {
# Windows style - any but a unicode space char
- set tcl_wordchars "\\S"
- set tcl_nonwordchars "\\s"
+ set ::tcl_wordchars {\S}
+ set ::tcl_nonwordchars {\s}
} else {
# Motif style - any unicode word char (number, letter, or underscore)
- set tcl_wordchars "\\w"
- set tcl_nonwordchars "\\W"
+ set ::tcl_wordchars {\w}
+ set ::tcl_nonwordchars {\W}
+}
+
+# Arrange for caches of the real matcher REs to be kept, which enables the REs
+# themselves to be cached for greater performance (and somewhat greater
+# clarity too).
+
+namespace eval ::tcl {
+ variable WordBreakRE
+ array set WordBreakRE {}
+
+ proc UpdateWordBreakREs args {
+ # Ignores the arguments
+ global tcl_wordchars tcl_nonwordchars
+ variable WordBreakRE
+
+ # To keep the RE strings short...
+ set letter $tcl_wordchars
+ set space $tcl_nonwordchars
+
+ set WordBreakRE(after) "$letter$space|$space$letter"
+ set WordBreakRE(before) "^.*($letter$space|$space$letter)"
+ set WordBreakRE(end) "$space*$letter+$space"
+ set WordBreakRE(next) "$letter*$space+$letter"
+ set WordBreakRE(previous) "$space*($letter+)$space*\$"
+ }
+
+ # Initialize the cache
+ UpdateWordBreakREs
+ trace add variable ::tcl_wordchars write ::tcl::UpdateWordBreakREs
+ trace add variable ::tcl_nonwordchars write ::tcl::UpdateWordBreakREs
}
# tcl_wordBreakAfter --
#
-# This procedure returns the index of the first word boundary
-# after the starting point in the given string, or -1 if there
-# are no more boundaries in the given string. The index returned refers
-# to the first character of the pair that comprises a boundary.
+# This procedure returns the index of the first word boundary after the
+# starting point in the given string, or -1 if there are no more boundaries in
+# the given string. The index returned refers to the first character of the
+# pair that comprises a boundary.
#
# Arguments:
# str - String to search.
# start - Index into string specifying starting point.
proc tcl_wordBreakAfter {str start} {
- global tcl_nonwordchars tcl_wordchars
- set str [string range $str $start end]
- if {[regexp -indices "$tcl_wordchars$tcl_nonwordchars|$tcl_nonwordchars$tcl_wordchars" $str result]} {
- return [expr {[lindex $result 1] + $start}]
- }
- return -1
+ variable ::tcl::WordBreakRE
+ set result {-1 -1}
+ regexp -indices -start $start $WordBreakRE(after) $str result
+ return [lindex $result 1]
}
# tcl_wordBreakBefore --
#
-# This procedure returns the index of the first word boundary
-# before the starting point in the given string, or -1 if there
-# are no more boundaries in the given string. The index returned
-# refers to the second character of the pair that comprises a boundary.
+# This procedure returns the index of the first word boundary before the
+# starting point in the given string, or -1 if there are no more boundaries in
+# the given string. The index returned refers to the second character of the
+# pair that comprises a boundary.
#
# Arguments:
# str - String to search.
# start - Index into string specifying starting point.
proc tcl_wordBreakBefore {str start} {
- global tcl_nonwordchars tcl_wordchars
- if {$start eq "end"} {
- set start [string length $str]
- }
- if {[regexp -indices "^.*($tcl_wordchars$tcl_nonwordchars|$tcl_nonwordchars$tcl_wordchars)" [string range $str 0 $start] result]} {
- return [lindex $result 1]
- }
- return -1
+ variable ::tcl::WordBreakRE
+ set result {-1 -1}
+ regexp -indices $WordBreakRE(before) [string range $str 0 $start] result
+ return [lindex $result 1]
}
# tcl_endOfWord --
#
-# This procedure returns the index of the first end-of-word location
-# after a starting index in the given string. An end-of-word location
-# is defined to be the first whitespace character following the first
-# non-whitespace character after the starting point. Returns -1 if
-# there are no more words after the starting point.
+# This procedure returns the index of the first end-of-word location after a
+# starting index in the given string. An end-of-word location is defined to be
+# the first whitespace character following the first non-whitespace character
+# after the starting point. Returns -1 if there are no more words after the
+# starting point.
#
# Arguments:
# str - String to search.
# start - Index into string specifying starting point.
proc tcl_endOfWord {str start} {
- global tcl_nonwordchars tcl_wordchars
- if {[regexp -indices "$tcl_nonwordchars*$tcl_wordchars+$tcl_nonwordchars" \
- [string range $str $start end] result]} {
- return [expr {[lindex $result 1] + $start}]
- }
- return -1
+ variable ::tcl::WordBreakRE
+ set result {-1 -1}
+ regexp -indices -start $start $WordBreakRE(end) $str result
+ return [lindex $result 1]
}
# tcl_startOfNextWord --
#
-# This procedure returns the index of the first start-of-word location
-# after a starting index in the given string. A start-of-word
-# location is defined to be a non-whitespace character following a
-# whitespace character. Returns -1 if there are no more start-of-word
-# locations after the starting point.
+# This procedure returns the index of the first start-of-word location after a
+# starting index in the given string. A start-of-word location is defined to
+# be a non-whitespace character following a whitespace character. Returns -1
+# if there are no more start-of-word locations after the starting point.
#
# Arguments:
# str - String to search.
# start - Index into string specifying starting point.
proc tcl_startOfNextWord {str start} {
- global tcl_nonwordchars tcl_wordchars
- if {[regexp -indices "$tcl_wordchars*$tcl_nonwordchars+$tcl_wordchars" \
- [string range $str $start end] result]} {
- return [expr {[lindex $result 1] + $start}]
- }
- return -1
+ variable ::tcl::WordBreakRE
+ set result {-1 -1}
+ regexp -indices -start $start $WordBreakRE(next) $str result
+ return [lindex $result 1]
}
# tcl_startOfPreviousWord --
#
-# This procedure returns the index of the first start-of-word location
-# before a starting index in the given string.
+# This procedure returns the index of the first start-of-word location before
+# a starting index in the given string.
#
# Arguments:
# str - String to search.
# start - Index into string specifying starting point.
proc tcl_startOfPreviousWord {str start} {
- global tcl_nonwordchars tcl_wordchars
- if {$start eq "end"} {
- set start [string length $str]
- }
- if {[regexp -indices \
- "$tcl_nonwordchars*($tcl_wordchars+)$tcl_nonwordchars*\$" \
- [string range $str 0 [expr {$start - 1}]] result word]} {
- return [lindex $word 0]
- }
- return -1
+ variable ::tcl::WordBreakRE
+ set word {-1 -1}
+ regexp -indices $WordBreakRE(previous) [string range $str 0 $start-1] \
+ result word
+ return [lindex $word 0]
}