summaryrefslogtreecommitdiffstats
path: root/tcllib/modules/textutil/split.tcl
diff options
context:
space:
mode:
Diffstat (limited to 'tcllib/modules/textutil/split.tcl')
-rw-r--r--tcllib/modules/textutil/split.tcl167
1 files changed, 167 insertions, 0 deletions
diff --git a/tcllib/modules/textutil/split.tcl b/tcllib/modules/textutil/split.tcl
new file mode 100644
index 0000000..4adceba
--- /dev/null
+++ b/tcllib/modules/textutil/split.tcl
@@ -0,0 +1,167 @@
+# split.tcl --
+#
+# Various ways of splitting a string.
+#
+# Copyright (c) 2000 by Ajuba Solutions.
+# Copyright (c) 2000 by Eric Melski <ericm@ajubasolutions.com>
+# Copyright (c) 2001 by Reinhard Max <max@suse.de>
+# Copyright (c) 2003 by Pat Thoyts <patthoyts@users.sourceforge.net>
+# Copyright (c) 2001-2006 by Andreas Kupries <andreas_kupries@users.sourceforge.net>
+#
+# See the file "license.terms" for information on usage and redistribution
+# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
+#
+# RCS: @(#) $Id: split.tcl,v 1.7 2006/04/21 04:42:28 andreas_kupries Exp $
+
+# ### ### ### ######### ######### #########
+## Requirements
+
+package require Tcl 8.2
+
+namespace eval ::textutil::split {}
+
+########################################################################
+# This one was written by Bob Techentin (RWT in Tcl'ers Wiki):
+# http://www.techentin.net
+# mailto:techentin.robert@mayo.edu
+#
+# Later, he send me an email stated that I can use it anywhere, because
+# no copyright was added, so the code is defacto in the public domain.
+#
+# You can found it in the Tcl'ers Wiki here:
+# http://mini.net/cgi-bin/wikit/460.html
+#
+# Bob wrote:
+# If you need to split string into list using some more complicated rule
+# than builtin split command allows, use following function. It mimics
+# Perl split operator which allows regexp as element separator, but,
+# like builtin split, it expects string to split as first arg and regexp
+# as second (optional) By default, it splits by any amount of whitespace.
+# Note that if you add parenthesis into regexp, parenthesed part of separator
+# would be added into list as additional element. Just like in Perl. -- cary
+#
+# Speed improvement by Reinhard Max:
+# Instead of repeatedly copying around the not yet matched part of the
+# string, I use [regexp]'s -start option to restrict the match to that
+# part. This reduces the complexity from something like O(n^1.5) to
+# O(n). My test case for that was:
+#
+# foreach i {1 10 100 1000 10000} {
+# set s [string repeat x $i]
+# puts [time {splitx $s .}]
+# }
+#
+
+if {[package vsatisfies [package provide Tcl] 8.3]} {
+
+ proc ::textutil::split::splitx {str {regexp {[\t \r\n]+}}} {
+ # Bugfix 476988
+ if {[string length $str] == 0} {
+ return {}
+ }
+ if {[string length $regexp] == 0} {
+ return [::split $str ""]
+ }
+ set list {}
+ set start 0
+ while {[regexp -start $start -indices -- $regexp $str match submatch]} {
+ foreach {subStart subEnd} $submatch break
+ foreach {matchStart matchEnd} $match break
+ incr matchStart -1
+ incr matchEnd
+ lappend list [string range $str $start $matchStart]
+ if {$subStart >= $start} {
+ lappend list [string range $str $subStart $subEnd]
+ }
+ set start $matchEnd
+ }
+ lappend list [string range $str $start end]
+ return $list
+ }
+
+} else {
+ # For tcl <= 8.2 we do not have regexp -start...
+ proc ::textutil::split::splitx [list str [list regexp "\[\t \r\n\]+"]] {
+
+ if {[string length $str] == 0} {
+ return {}
+ }
+ if {[string length $regexp] == 0} {
+ return [::split $str {}]
+ }
+
+ set list {}
+ while {[regexp -indices -- $regexp $str match submatch]} {
+ lappend list [string range $str 0 [expr {[lindex $match 0] -1}]]
+ if {[lindex $submatch 0] >= 0} {
+ lappend list [string range $str [lindex $submatch 0] \
+ [lindex $submatch 1]]
+ }
+ set str [string range $str [expr {[lindex $match 1]+1}] end]
+ }
+ lappend list $str
+ return $list
+ }
+
+}
+
+#
+# splitn --
+#
+# splitn splits the string $str into chunks of length $len. These
+# chunks are returned as a list.
+#
+# If $str really contains a ByteArray object (as retrieved from binary
+# encoded channels) splitn must honor this by splitting the string
+# into chunks of $len bytes.
+#
+# It is an error to call splitn with a nonpositive $len.
+#
+# If splitn is called with an empty string, it returns the empty list.
+#
+# If the length of $str is not an entire multiple of the chunk length,
+# the last chunk in the generated list will be shorter than $len.
+#
+# The implementation presented here was given by Bryan Oakley, as
+# part of a ``contest'' I staged on c.l.t in July 2004. I selected
+# this version, as it does not rely on runtime generated code, is
+# very fast for chunk size one, not too bad in all the other cases,
+# and uses [split] or [string range] which have been around for quite
+# some time.
+#
+# -- Robert Suetterlin (robert@mpe.mpg.de)
+#
+proc ::textutil::split::splitn {str {len 1}} {
+
+ if {$len <= 0} {
+ return -code error "len must be > 0"
+ }
+
+ if {$len == 1} {
+ return [split $str {}]
+ }
+
+ set result [list]
+ set max [string length $str]
+ set i 0
+ set j [expr {$len -1}]
+ while {$i < $max} {
+ lappend result [string range $str $i $j]
+ incr i $len
+ incr j $len
+ }
+
+ return $result
+}
+
+# ### ### ### ######### ######### #########
+## Data structures
+
+namespace eval ::textutil::split {
+ namespace export splitx splitn
+}
+
+# ### ### ### ######### ######### #########
+## Ready
+
+package provide textutil::split 0.7