From 249c0cdd339420c04f811985d70a1e28cd40e9d9 Mon Sep 17 00:00:00 2001
From: hobbs <hobbs>
Date: Wed, 2 Feb 2000 22:32:07 +0000
Subject: 	* tests/regexp.test: added tests for -all and -inline switches
 	* doc/regexp.n: added docs for -all and -inline switches 	*
 generic/tclCmdMZ.c (Tcl_RegexpObjCmd): added extra comments for 	new
 -all and -inline switches to regexp command

---
 ChangeLog          |   7 ++
 doc/regexp.n       |  56 +++++++++-----
 generic/tclCmdMZ.c | 224 +++++++++++++++++++++++++++++++++++------------------
 tests/regexp.test  |  62 ++++++++++++---
 4 files changed, 245 insertions(+), 104 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index d17b26b..9f9f17b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2000-02-02  Jeff Hobbs  <hobbs@scriptics.com>
+
+	* tests/regexp.test: added tests for -all and -inline switches
+	* doc/regexp.n: added docs for -all and -inline switches
+	* generic/tclCmdMZ.c (Tcl_RegexpObjCmd): added extra comments for
+	new -all and -inline switches to regexp command
+
 2000-02-01  Eric Melski  <ericm@scriptics.com>
 
 	* library/init.tcl: Applied patch from rfe 1734 regarding
diff --git a/doc/regexp.n b/doc/regexp.n
index 3b19420..373691a 100644
--- a/doc/regexp.n
+++ b/doc/regexp.n
@@ -4,7 +4,7 @@
 '\" See the file "license.terms" for information on usage and redistribution
 '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
 '\" 
-'\" RCS: @(#) $Id: regexp.n,v 1.7 1999/09/21 04:20:36 hobbs Exp $
+'\" RCS: @(#) $Id: regexp.n,v 1.8 2000/02/02 22:32:09 hobbs Exp $
 '\" 
 .so man.macros
 .TH regexp n 8.3 Tcl "Tcl Built-In Commands"
@@ -20,7 +20,8 @@ regexp \- Match a regular expression against a string
 .SH DESCRIPTION
 .PP
 Determines whether the regular expression \fIexp\fR matches part or
-all of \fIstring\fR and returns 1 if it does, 0 if it doesn't.
+all of \fIstring\fR and returns 1 if it does, 0 if it doesn't, unless
+\fB-inline\fR is specified (see below).
 (Regular expression matching is described in the \fBre_syntax\fR
 reference page.)
 .LP
@@ -38,9 +39,17 @@ If the initial arguments to \fBregexp\fR start with \fB\-\fR then
 they are treated as switches.  The following switches are
 currently supported:
 .TP 15
-\fB\-nocase\fR
-Causes upper-case characters in \fIstring\fR to be treated as
-lower case during the matching process.
+\fB\-about\fR
+Instead of attempting to match the regular expression, returns a list
+containing information about the regular expression.  The first
+element of the list is a subexpression count.  The second element is a
+list of property names that describe various attributes of the regular
+expression. This switch is primarily intended for debugging purposes.
+.TP 15
+\fB\-expanded\fR
+Enables use of the expanded regular expression syntax where
+whitespace and comments are ignored.  This is the same as specifying
+the \fB(?x)\fR embedded option (see METASYNTAX, below).
 .TP 15
 \fB\-indices\fR
 Changes what is stored in the \fIsubMatchVar\fRs. 
@@ -49,12 +58,6 @@ each variable
 will contain a list of two decimal strings giving the indices
 in \fIstring\fR of the first and last characters in the matching
 range of characters.
-.VS 8.1
-.TP 15
-\fB\-expanded\fR
-Enables use of the expanded regular expression syntax where
-whitespace and comments are ignored.  This is the same as specifying
-the \fB(?x)\fR embedded option (see METASYNTAX, below).
 .TP 15
 \fB\-line\fR
 Enables newline-sensitive matching.  By default, newline is a
@@ -76,15 +79,32 @@ Changes the behavior of `^' and `$' (the ``anchors'') so they match the
 beginning and end of a line respectively.  This is the same as
 specifying the \fB(?w)\fR embedded option (see METASYNTAX, below).
 .TP 15
-\fB\-about\fR
-Instead of attempting to match the regular expression, returns a list
-containing information about the regular expression.  The first
-element of the list is a subexpression count.  The second element is a
-list of property names that describe various attributes of the regular
-expression. This switch is primarily intended for debugging purposes.
-.VE 8.1
+\fB\-nocase\fR
+Causes upper-case characters in \fIstring\fR to be treated as
+lower case during the matching process.
 .VS 8.3
 .TP 15
+\fB\-all\fR
+Causes the regular expression to be matched as many times as possible
+in the string, returning the total number of matches found.  If this
+is specified with match variables, they will continue information for
+the last match only.
+.TP 15
+\fB\-inline\fR
+Causes the command to return the data that would otherwise be placed
+in match variables to be returned as a list.  When using \fB-inline\fR,
+match variables may not be specified.  If used with \fB-all\fR, the
+list will be concatenated at each iteration, such that a flat list is
+always returned.  For each match iteration, the command will append the
+overall match data, plus one element for each subexpression in the
+regular expression.  Examples are:
+.CS
+    regexp -inline -- {\\w(\\w)} " inlined "
+ => {in n}
+    regexp -all -inline -- {\\w(\\w)} " inlined "
+ => {in n li i ne e}
+.CE
+.TP 15
 \fB\-start\fR \fIindex\fR
 Specifies a character index offset into the string to start
 matching the regular expression at.  When using this switch, `^'
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index af3da2c..52ad4d8 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -13,7 +13,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.23 1999/12/12 02:26:41 hobbs Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.24 2000/02/02 22:32:11 hobbs Exp $
  */
 
 #include "tclInt.h"
@@ -126,27 +126,29 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
     int objc;				/* Number of arguments. */
     Tcl_Obj *CONST objv[];		/* Argument objects. */
 {
-    int i, indices, match, about, offset;
+    int i, indices, match, about, offset, all, doinline, numMatchesSaved;
     int cflags, eflags;
     Tcl_RegExp regExpr;
-    Tcl_Obj *objPtr;
+    Tcl_Obj *objPtr, *resultPtr;
     Tcl_RegExpInfo info;
     static char *options[] = {
-	"-indices",	"-nocase",	"-about",	"-expanded",
-	"-line",	"-linestop",	"-lineanchor",	"-start",
-	"--",		(char *) NULL
+	"-all",		"-about",	"-indices",	"-inline",
+	"-expanded",	"-line",	"-linestop",	"-lineanchor",
+	"-nocase",	"-start",	"--",		(char *) NULL
     };
     enum options {
-	REGEXP_INDICES, REGEXP_NOCASE,	REGEXP_ABOUT,	REGEXP_EXPANDED,
-	REGEXP_LINE,	REGEXP_LINESTOP, REGEXP_LINEANCHOR,	REGEXP_START,
-	REGEXP_LAST
+	REGEXP_ALL,	REGEXP_ABOUT,	REGEXP_INDICES,	REGEXP_INLINE,
+	REGEXP_EXPANDED,REGEXP_LINE,	REGEXP_LINESTOP,REGEXP_LINEANCHOR,
+	REGEXP_NOCASE,	REGEXP_START,	REGEXP_LAST
     };
 
-    indices = 0;
-    about = 0;
-    cflags = TCL_REG_ADVANCED;
-    eflags = 0;
-    offset = 0;
+    indices	= 0;
+    about	= 0;
+    cflags	= TCL_REG_ADVANCED;
+    eflags	= 0;
+    offset	= 0;
+    all		= 0;
+    doinline	= 0;
     
     for (i = 1; i < objc; i++) {
 	char *name;
@@ -161,10 +163,18 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
 	    return TCL_ERROR;
 	}
 	switch ((enum options) index) {
+	    case REGEXP_ALL: {
+		all = 1;
+		break;
+	    }
 	    case REGEXP_INDICES: {
 		indices = 1;
 		break;
 	    }
+	    case REGEXP_INLINE: {
+		doinline = 1;
+		break;
+	    }
 	    case REGEXP_NOCASE: {
 		cflags |= TCL_REG_NOCASE;
 		break;
@@ -209,14 +219,22 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
     }
 
     endOfForLoop:
-    if (objc - i < 2 - about) {
-	Tcl_WrongNumArgs(interp, 1, objv,
-		"?switches? exp string ?matchVar? ?subMatchVar subMatchVar ...?");
+    if ((objc - i) < (2 - about)) {
+	Tcl_WrongNumArgs(interp, 1, objv, "?switches? exp string ?matchVar? ?subMatchVar subMatchVar ...?");
 	return TCL_ERROR;
     }
     objc -= i;
     objv += i;
 
+    if (doinline && ((objc - 2) != 0)) {
+	/*
+	 * User requested -inline, but specified match variables - a no-no.
+	 */
+	Tcl_AppendResult(interp, "regexp match variables not allowed",
+		" when using -inline", (char *) NULL);
+	return TCL_ERROR;
+    }
+
     regExpr = Tcl_GetRegExpFromObj(interp, objv[0], cflags);
     if (regExpr == NULL) {
 	return TCL_ERROR;
@@ -238,84 +256,142 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
 	eflags |= TCL_REG_NOTBOL;
     }
 
-    match = Tcl_RegExpExecObj(interp, regExpr, objPtr, offset /* offset */,
-	    objc-2 /* nmatches */, eflags);
-
-    if (match < 0) {
-	return TCL_ERROR;
-    }
+    objc -= 2;
+    objv += 2;
+    resultPtr = Tcl_GetObjResult(interp);
 
-    if (match == 0) {
+    if (doinline) {
 	/*
-	 * Set the interpreter's object result to an integer object w/
-	 * value 0.
+	 * Save all the subexpressions, as we will return them as a list
 	 */
-	
-	Tcl_SetIntObj(Tcl_GetObjResult(interp), 0);
-	return TCL_OK;
+	numMatchesSaved = -1;
+    } else {
+	/*
+	 * Save only enough subexpressions for matches we want to keep,
+	 * expect in the case of -all, where we need to keep at least
+	 * one to know where to move the offset.
+	 */
+	numMatchesSaved = (objc == 0) ? all : objc;
     }
 
-    /*
-     * If additional variable names have been specified, return
-     * index information in those variables.
-     */
-
-    objc -= 2;
-    objv += 2;
-
-    Tcl_RegExpGetInfo(regExpr, &info);
-    for (i = 0; i < objc; i++) {
-	Tcl_Obj *varPtr, *valuePtr, *newPtr;
-	
-	varPtr = objv[i];
-	if (indices) {
-	    int start, end;
-	    Tcl_Obj *objs[2];
+    while (1) {
+	match = Tcl_RegExpExecObj(interp, regExpr, objPtr,
+		offset /* offset */, numMatchesSaved, eflags);
 
-	    if (i <= info.nsubs) {
-		start = offset + info.matches[i].start;
-		end = offset + info.matches[i].end;
+	if (match < 0) {
+	    return TCL_ERROR;
+	}
 
+	if (match == 0) {
+	    /*
+	     * We want to set the value of the intepreter result only when
+	     * this is the first time through the loop.
+	     */
+	    if (all <= 1) {
 		/*
-		 * Adjust index so it refers to the last character in the
-		 * match instead of the first character after the match.
+		 * If inlining, set the interpreter's object result to an
+		 * empty list, otherwise set it to an integer object w/
+		 * value 0.
 		 */
-
-		if (end >= offset) {
-		    end--;
+		if (doinline) {
+		    Tcl_SetListObj(resultPtr, 0, NULL);
+		} else {
+		    Tcl_SetIntObj(resultPtr, 0);
 		}
-	    } else {
-		start = -1;
-		end = -1;
+		return TCL_OK;
 	    }
+	    break;
+	}
 
-	    objs[0] = Tcl_NewLongObj(start);
-	    objs[1] = Tcl_NewLongObj(end);
+	/*
+	 * If additional variable names have been specified, return
+	 * index information in those variables.
+	 */
 
-	    newPtr = Tcl_NewListObj(2, objs);
-	} else {
-	    if (i <= info.nsubs) {
-		newPtr = Tcl_GetRange(objPtr, offset + info.matches[i].start,
-			offset + info.matches[i].end - 1);
+	Tcl_RegExpGetInfo(regExpr, &info);
+	if (doinline) {
+	    /*
+	     * It's the number of substitutions, plus one for the matchVar
+	     * at index 0
+	     */
+	    objc = info.nsubs + 1;
+	}
+	for (i = 0; i < objc; i++) {
+	    Tcl_Obj *newPtr;
+
+	    if (indices) {
+		int start, end;
+		Tcl_Obj *objs[2];
+
+		if (i <= info.nsubs) {
+		    start = offset + info.matches[i].start;
+		    end   = offset + info.matches[i].end;
+
+		    /*
+		     * Adjust index so it refers to the last character in the
+		     * match instead of the first character after the match.
+		     */
+
+		    if (end >= offset) {
+			end--;
+		    }
+		} else {
+		    start = -1;
+		    end   = -1;
+		}
+
+		objs[0] = Tcl_NewLongObj(start);
+		objs[1] = Tcl_NewLongObj(end);
+
+		newPtr = Tcl_NewListObj(2, objs);
 	    } else {
-		newPtr = Tcl_NewObj();
-		
+		if (i <= info.nsubs) {
+		    newPtr = Tcl_GetRange(objPtr,
+			    offset + info.matches[i].start,
+			    offset + info.matches[i].end - 1);
+		} else {
+		    newPtr = Tcl_NewObj();
+		}
+	    }
+	    if (doinline) {
+		if (Tcl_ListObjAppendElement(interp, resultPtr, newPtr)
+			!= TCL_OK) {
+		    Tcl_DecrRefCount(newPtr);
+		    return TCL_ERROR;
+		}
+	    } else {
+		Tcl_Obj *valuePtr;
+		valuePtr = Tcl_ObjSetVar2(interp, objv[i], NULL, newPtr, 0);
+		if (valuePtr == NULL) {
+		    Tcl_DecrRefCount(newPtr);
+		    Tcl_AppendResult(interp, "couldn't set variable \"",
+			    Tcl_GetString(objv[i]), "\"", (char *) NULL);
+		    return TCL_ERROR;
+		}
 	    }
 	}
-	valuePtr = Tcl_ObjSetVar2(interp, varPtr, NULL, newPtr, 0);
-	if (valuePtr == NULL) {
-	    Tcl_DecrRefCount(newPtr);
-	    Tcl_AppendResult(interp, "couldn't set variable \"",
-		    Tcl_GetString(varPtr), "\"", (char *) NULL);
-	    return TCL_ERROR;
+
+	if (all == 0) {
+	    break;
 	}
+	/*
+	 * Adjust the offset to the character just after the last one
+	 * in the matchVar and increment all to count how many times
+	 * we are making a match
+	 */
+	offset += info.matches[0].end;
+	all++;
     }
 
     /*
-     * Set the interpreter's object result to an integer object w/ value 1. 
+     * Set the interpreter's object result to an integer object
+     * with value 1 if -all wasn't specified, otherwise it's all-1
+     * (the number of times through the while - 1).
      */
-	
-    Tcl_SetIntObj(Tcl_GetObjResult(interp), 1);
+
+    if (!doinline) {
+	Tcl_SetIntObj(resultPtr, (all ? all-1 : 1));
+    }
     return TCL_OK;
 }
 
diff --git a/tests/regexp.test b/tests/regexp.test
index 6bff015..e8836bd 100644
--- a/tests/regexp.test
+++ b/tests/regexp.test
@@ -11,7 +11,7 @@
 # See the file "license.terms" for information on usage and redistribution
 # of this file, and for a DISCLAIMER OF ALL WARRANTIES.
 #
-# RCS: @(#) $Id: regexp.test,v 1.10 1999/09/21 04:20:45 hobbs Exp $
+# RCS: @(#) $Id: regexp.test,v 1.11 2000/02/02 22:32:13 hobbs Exp $
 
 if {[lsearch [namespace children] ::tcltest] == -1} {
     package require tcltest
@@ -190,7 +190,7 @@ test regexp-6.2 {regexp errors} {
 } {1 {wrong # args: should be "regexp ?switches? exp string ?matchVar? ?subMatchVar subMatchVar ...?"}}
 test regexp-6.3 {regexp errors} {
     list [catch {regexp -gorp a} msg] $msg
-} {1 {bad switch "-gorp": must be -indices, -nocase, -about, -expanded, -line, -linestop, -lineanchor, -start, or --}}
+} {1 {bad switch "-gorp": must be -all, -about, -indices, -inline, -expanded, -line, -linestop, -lineanchor, -nocase, -start, or --}}
 test regexp-6.4 {regexp errors} {
     list [catch {regexp a( b} msg] $msg
 } {1 {couldn't compile regular expression pattern: parentheses () not balanced}}
@@ -371,8 +371,9 @@ test regexp-11.8 {regsub errors, -start bad int check} {
 
 # This test crashes on the Mac unless you increase the Stack Space to about 1
 # Meg.  This is probably bigger than most users want... 
-
-test regexp-12.1 {macCrash} {Tcl_RegExpExec: large number of subexpressions} {
+# 8.2.3 regexp reduced stack space requirements, but this should be
+# tested again
+test regexp-12.1 {Tcl_RegExpExec: large number of subexpressions} {macCrash} {
     list [regexp (.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.) abcdefghijklmnopqrstuvwxyz all a b c d e f g h i j k l m n o p q r s t u v w x y z] $all $a $b $c $d $e $f $g $h $i $j $k $l $m $n $o $p $q $r $s $t $u $v $w $x $y $z
 } {1 abcdefghijklmnopqrstuvwxyz a b c d e f g h i j k l m n o p q r s t u v w x y z}
 
@@ -382,10 +383,7 @@ test regexp-13.1 {regsub of a very large string} {
     # is in use.
 
     set line {BEGIN_TABLE ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; END_TABLE}
-    set filedata ""
-    for {set i 1} {$i<200} {incr i} {
-	append filedata $line
-    }
+    set filedata [string repeat $line 200]
     for {set i 1} {$i<10} {incr i} {
 	regsub -all "BEGIN_TABLE " $filedata "" newfiledata
     }
@@ -415,7 +413,7 @@ test regexp-14.2 {CompileRegexp: regexp cache, different flags} {
 
 # There is no exec on the Mac ...
 
-test regexp-14.3 {unixOrPc} {CompileRegexp: regexp cache, empty regexp and empty cache} {
+test regexp-14.3 {CompileRegexp: regexp cache, empty regexp and empty cache} {unixOrPc} {
     makeFile {puts [regexp {} foo]} junk.tcl
     exec $::tcltest::tcltest junk.tcl
 } 1
@@ -462,9 +460,49 @@ test regexp-16.4 {regsub -start, \A behavior} {
     lappend out [regsub -start 2 -all {\A(\w)} {abcde} {/\1} x] $x
 } {5 /a/b/c/d/e 3 ab/c/d/e}
 
-set x 1
-set y 2
-regexp "$x$y" 123
+test regexp-17.1 {regexp -inline} {
+    regexp -inline b ababa
+} {b}
+test regexp-17.2 {regexp -inline} {
+    regexp -inline (b) ababa
+} {b b}
+test regexp-17.3 {regexp -inline -indices} {
+    regexp -inline -indices (b) ababa
+} {{1 1} {1 1}}
+test regexp-17.4 {regexp -inline} {
+    regexp -inline {\w(\d+)\w} "   hello 23 there456def "
+} {e456d 456}
+test regexp-17.5 {regexp -inline no matches} {
+    regexp -inline {\w(\d+)\w} ""
+} {}
+test regexp-17.6 {regexp -inline no matches} {
+    regexp -inline hello goodbye
+} {}
+test regexp-17.7 {regexp -inline, no matchvars allowed} {
+    list [catch {regexp -inline b abc match} msg] $msg
+} {1 {regexp match variables not allowed when using -inline}}
+
+test regexp-18.1 {regexp -all} {
+    regexp -all b bbbbb
+} {5}
+test regexp-18.2 {regexp -all} {
+    regexp -all b abababbabaaaaaaaaaab
+} {6}
+test regexp-18.3 {regexp -all -inline} {
+    regexp -all -inline b abababbabaaaaaaaaaab
+} {b b b b b b}
+test regexp-18.4 {regexp -all -inline} {
+    regexp -all -inline {\w(\w)} abcdefg
+} {ab b cd d ef f}
+test regexp-18.5 {regexp -all -inline} {
+    regexp -all -inline {\w(\w)$} abcdefg
+} {fg g}
+test regexp-18.6 {regexp -all -inline} {
+    regexp -all -inline {\d+} 10:20:30:40
+} {10 20 30 40}
+test regexp-18.7 {regexp -all -inline} {
+    list [catch {regexp -all -inline b abc match} msg] $msg
+} {1 {regexp match variables not allowed when using -inline}}
 
 # cleanup
 ::tcltest::cleanupTests
-- 
cgit v0.12