1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
|
# split.tcl --
#
# Various ways of splitting a string.
#
# Copyright (c) 2000 by Ajuba Solutions.
# Copyright (c) 2000 by Eric Melski <ericm@ajubasolutions.com>
# Copyright (c) 2001 by Reinhard Max <max@suse.de>
# Copyright (c) 2003 by Pat Thoyts <patthoyts@users.sourceforge.net>
# Copyright (c) 2001-2006 by Andreas Kupries <andreas_kupries@users.sourceforge.net>
#
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
# RCS: @(#) $Id: split.tcl,v 1.7 2006/04/21 04:42:28 andreas_kupries Exp $
# ### ### ### ######### ######### #########
## Requirements
package require Tcl 8.2
namespace eval ::textutil::split {}
########################################################################
# This one was written by Bob Techentin (RWT in Tcl'ers Wiki):
# http://www.techentin.net
# mailto:techentin.robert@mayo.edu
#
# Later, he send me an email stated that I can use it anywhere, because
# no copyright was added, so the code is defacto in the public domain.
#
# You can found it in the Tcl'ers Wiki here:
# http://mini.net/cgi-bin/wikit/460.html
#
# Bob wrote:
# If you need to split string into list using some more complicated rule
# than builtin split command allows, use following function. It mimics
# Perl split operator which allows regexp as element separator, but,
# like builtin split, it expects string to split as first arg and regexp
# as second (optional) By default, it splits by any amount of whitespace.
# Note that if you add parenthesis into regexp, parenthesed part of separator
# would be added into list as additional element. Just like in Perl. -- cary
#
# Speed improvement by Reinhard Max:
# Instead of repeatedly copying around the not yet matched part of the
# string, I use [regexp]'s -start option to restrict the match to that
# part. This reduces the complexity from something like O(n^1.5) to
# O(n). My test case for that was:
#
# foreach i {1 10 100 1000 10000} {
# set s [string repeat x $i]
# puts [time {splitx $s .}]
# }
#
if {[package vsatisfies [package provide Tcl] 8.3]} {
proc ::textutil::split::splitx {str {regexp {[\t \r\n]+}}} {
# Bugfix 476988
if {[string length $str] == 0} {
return {}
}
if {[string length $regexp] == 0} {
return [::split $str ""]
}
set list {}
set start 0
while {[regexp -start $start -indices -- $regexp $str match submatch]} {
foreach {subStart subEnd} $submatch break
foreach {matchStart matchEnd} $match break
incr matchStart -1
incr matchEnd
lappend list [string range $str $start $matchStart]
if {$subStart >= $start} {
lappend list [string range $str $subStart $subEnd]
}
set start $matchEnd
}
lappend list [string range $str $start end]
return $list
}
} else {
# For tcl <= 8.2 we do not have regexp -start...
proc ::textutil::split::splitx [list str [list regexp "\[\t \r\n\]+"]] {
if {[string length $str] == 0} {
return {}
}
if {[string length $regexp] == 0} {
return [::split $str {}]
}
set list {}
while {[regexp -indices -- $regexp $str match submatch]} {
lappend list [string range $str 0 [expr {[lindex $match 0] -1}]]
if {[lindex $submatch 0] >= 0} {
lappend list [string range $str [lindex $submatch 0] \
[lindex $submatch 1]]
}
set str [string range $str [expr {[lindex $match 1]+1}] end]
}
lappend list $str
return $list
}
}
#
# splitn --
#
# splitn splits the string $str into chunks of length $len. These
# chunks are returned as a list.
#
# If $str really contains a ByteArray object (as retrieved from binary
# encoded channels) splitn must honor this by splitting the string
# into chunks of $len bytes.
#
# It is an error to call splitn with a nonpositive $len.
#
# If splitn is called with an empty string, it returns the empty list.
#
# If the length of $str is not an entire multiple of the chunk length,
# the last chunk in the generated list will be shorter than $len.
#
# The implementation presented here was given by Bryan Oakley, as
# part of a ``contest'' I staged on c.l.t in July 2004. I selected
# this version, as it does not rely on runtime generated code, is
# very fast for chunk size one, not too bad in all the other cases,
# and uses [split] or [string range] which have been around for quite
# some time.
#
# -- Robert Suetterlin (robert@mpe.mpg.de)
#
proc ::textutil::split::splitn {str {len 1}} {
if {$len <= 0} {
return -code error "len must be > 0"
}
if {$len == 1} {
return [split $str {}]
}
set result [list]
set max [string length $str]
set i 0
set j [expr {$len -1}]
while {$i < $max} {
lappend result [string range $str $i $j]
incr i $len
incr j $len
}
return $result
}
# ### ### ### ######### ######### #########
## Data structures
namespace eval ::textutil::split {
namespace export splitx splitn
}
# ### ### ### ######### ######### #########
## Ready
package provide textutil::split 0.7
|