library/word.tcl


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152

# word.tcl --
#
# This file defines various procedures for computing word boundaries in
# strings. This file is primarily needed so Tk text and entry widgets behave
# properly for different platforms.
#
# Copyright (c) 1996 by Sun Microsystems, Inc.
# Copyright (c) 1998 by Scritpics Corporation.
#
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.

# The following variables are used to determine which characters are
# interpreted as white space.

if {$::tcl_platform(platform) eq "windows"} {
    # Windows style - any but a unicode space char
    if {![info exists ::tcl_wordchars]} {
	set ::tcl_wordchars {\S}
    }
    if {![info exists ::tcl_nonwordchars]} {
	set ::tcl_nonwordchars {\s}
    }
} else {
    # Motif style - any unicode word char (number, letter, or underscore)
    if {![info exists ::tcl_wordchars]} {
	set ::tcl_wordchars {\w}
    }
    if {![info exists ::tcl_nonwordchars]} {
	set ::tcl_nonwordchars {\W}
    }
}

# Arrange for caches of the real matcher REs to be kept, which enables the REs
# themselves to be cached for greater performance (and somewhat greater
# clarity too).

namespace eval ::tcl {
    variable WordBreakRE
    array set WordBreakRE {}

    proc UpdateWordBreakREs args {
	# Ignores the arguments
	global tcl_wordchars tcl_nonwordchars
	variable WordBreakRE

	# To keep the RE strings short...
	set letter $tcl_wordchars
	set space $tcl_nonwordchars

	set WordBreakRE(after)		"$letter$space|$space$letter"
	set WordBreakRE(before)		"^.*($letter$space|$space$letter)"
	set WordBreakRE(end)		"$space*$letter+$space"
	set WordBreakRE(next)		"$letter*$space+$letter"
	set WordBreakRE(previous)	"$space*($letter+)$space*\$"
    }

    # Initialize the cache
    UpdateWordBreakREs
    trace add variable ::tcl_wordchars write ::tcl::UpdateWordBreakREs
    trace add variable ::tcl_nonwordchars write ::tcl::UpdateWordBreakREs
}

# tcl_wordBreakAfter --
#
# This procedure returns the index of the first word boundary after the
# starting point in the given string, or -1 if there are no more boundaries in
# the given string. The index returned refers to the first character of the
# pair that comprises a boundary.
#
# Arguments:
# str -		String to search.
# start -	Index into string specifying starting point.

proc tcl_wordBreakAfter {str start} {
    variable ::tcl::WordBreakRE
    set result {-1 -1}
    regexp -indices -start $start -- $WordBreakRE(after) $str result
    return [lindex $result 1]
}

# tcl_wordBreakBefore --
#
# This procedure returns the index of the first word boundary before the
# starting point in the given string, or -1 if there are no more boundaries in
# the given string. The index returned refers to the second character of the
# pair that comprises a boundary.
#
# Arguments:
# str -		String to search.
# start -	Index into string specifying starting point.

proc tcl_wordBreakBefore {str start} {
    variable ::tcl::WordBreakRE
    set result {-1 -1}
    regexp -indices -- $WordBreakRE(before) [string range $str 0 $start] result
    return [lindex $result 1]
}

# tcl_endOfWord --
#
# This procedure returns the index of the first end-of-word location after a
# starting index in the given string. An end-of-word location is defined to be
# the first whitespace character following the first non-whitespace character
# after the starting point. Returns -1 if there are no more words after the
# starting point.
#
# Arguments:
# str -		String to search.
# start -	Index into string specifying starting point.

proc tcl_endOfWord {str start} {
    variable ::tcl::WordBreakRE
    set result {-1 -1}
    regexp -indices -start $start -- $WordBreakRE(end) $str result
    return [lindex $result 1]
}

# tcl_startOfNextWord --
#
# This procedure returns the index of the first start-of-word location after a
# starting index in the given string. A start-of-word location is defined to
# be a non-whitespace character following a whitespace character. Returns -1
# if there are no more start-of-word locations after the starting point.
#
# Arguments:
# str -		String to search.
# start -	Index into string specifying starting point.

proc tcl_startOfNextWord {str start} {
    variable ::tcl::WordBreakRE
    set result {-1 -1}
    regexp -indices -start $start -- $WordBreakRE(next) $str result
    return [lindex $result 1]
}

# tcl_startOfPreviousWord --
#
# This procedure returns the index of the first start-of-word location before
# a starting index in the given string.
#
# Arguments:
# str -		String to search.
# start -	Index into string specifying starting point.

proc tcl_startOfPreviousWord {str start} {
    variable ::tcl::WordBreakRE
    set word {-1 -1}
    regexp -indices -- $WordBreakRE(previous) [string range $str 0 $start-1] \
	    result word
    return [lindex $word 0]
}