summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorhobbs <hobbs>2000-02-02 22:32:07 (GMT)
committerhobbs <hobbs>2000-02-02 22:32:07 (GMT)
commit249c0cdd339420c04f811985d70a1e28cd40e9d9 (patch)
tree4385fdf7ff72732b8da6acdd0cec31c3d5d7fab4
parent178a952f93e48b971bbd9767f1665c47ce5b0c2f (diff)
downloadtcl-249c0cdd339420c04f811985d70a1e28cd40e9d9.zip
tcl-249c0cdd339420c04f811985d70a1e28cd40e9d9.tar.gz
tcl-249c0cdd339420c04f811985d70a1e28cd40e9d9.tar.bz2
* tests/regexp.test: added tests for -all and -inline switches
* doc/regexp.n: added docs for -all and -inline switches * generic/tclCmdMZ.c (Tcl_RegexpObjCmd): added extra comments for new -all and -inline switches to regexp command
-rw-r--r--ChangeLog7
-rw-r--r--doc/regexp.n56
-rw-r--r--generic/tclCmdMZ.c224
-rw-r--r--tests/regexp.test62
4 files changed, 245 insertions, 104 deletions
diff --git a/ChangeLog b/ChangeLog
index d17b26b..9f9f17b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2000-02-02 Jeff Hobbs <hobbs@scriptics.com>
+
+ * tests/regexp.test: added tests for -all and -inline switches
+ * doc/regexp.n: added docs for -all and -inline switches
+ * generic/tclCmdMZ.c (Tcl_RegexpObjCmd): added extra comments for
+ new -all and -inline switches to regexp command
+
2000-02-01 Eric Melski <ericm@scriptics.com>
* library/init.tcl: Applied patch from rfe 1734 regarding
diff --git a/doc/regexp.n b/doc/regexp.n
index 3b19420..373691a 100644
--- a/doc/regexp.n
+++ b/doc/regexp.n
@@ -4,7 +4,7 @@
'\" See the file "license.terms" for information on usage and redistribution
'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
'\"
-'\" RCS: @(#) $Id: regexp.n,v 1.7 1999/09/21 04:20:36 hobbs Exp $
+'\" RCS: @(#) $Id: regexp.n,v 1.8 2000/02/02 22:32:09 hobbs Exp $
'\"
.so man.macros
.TH regexp n 8.3 Tcl "Tcl Built-In Commands"
@@ -20,7 +20,8 @@ regexp \- Match a regular expression against a string
.SH DESCRIPTION
.PP
Determines whether the regular expression \fIexp\fR matches part or
-all of \fIstring\fR and returns 1 if it does, 0 if it doesn't.
+all of \fIstring\fR and returns 1 if it does, 0 if it doesn't, unless
+\fB-inline\fR is specified (see below).
(Regular expression matching is described in the \fBre_syntax\fR
reference page.)
.LP
@@ -38,9 +39,17 @@ If the initial arguments to \fBregexp\fR start with \fB\-\fR then
they are treated as switches. The following switches are
currently supported:
.TP 15
-\fB\-nocase\fR
-Causes upper-case characters in \fIstring\fR to be treated as
-lower case during the matching process.
+\fB\-about\fR
+Instead of attempting to match the regular expression, returns a list
+containing information about the regular expression. The first
+element of the list is a subexpression count. The second element is a
+list of property names that describe various attributes of the regular
+expression. This switch is primarily intended for debugging purposes.
+.TP 15
+\fB\-expanded\fR
+Enables use of the expanded regular expression syntax where
+whitespace and comments are ignored. This is the same as specifying
+the \fB(?x)\fR embedded option (see METASYNTAX, below).
.TP 15
\fB\-indices\fR
Changes what is stored in the \fIsubMatchVar\fRs.
@@ -49,12 +58,6 @@ each variable
will contain a list of two decimal strings giving the indices
in \fIstring\fR of the first and last characters in the matching
range of characters.
-.VS 8.1
-.TP 15
-\fB\-expanded\fR
-Enables use of the expanded regular expression syntax where
-whitespace and comments are ignored. This is the same as specifying
-the \fB(?x)\fR embedded option (see METASYNTAX, below).
.TP 15
\fB\-line\fR
Enables newline-sensitive matching. By default, newline is a
@@ -76,15 +79,32 @@ Changes the behavior of `^' and `$' (the ``anchors'') so they match the
beginning and end of a line respectively. This is the same as
specifying the \fB(?w)\fR embedded option (see METASYNTAX, below).
.TP 15
-\fB\-about\fR
-Instead of attempting to match the regular expression, returns a list
-containing information about the regular expression. The first
-element of the list is a subexpression count. The second element is a
-list of property names that describe various attributes of the regular
-expression. This switch is primarily intended for debugging purposes.
-.VE 8.1
+\fB\-nocase\fR
+Causes upper-case characters in \fIstring\fR to be treated as
+lower case during the matching process.
.VS 8.3
.TP 15
+\fB\-all\fR
+Causes the regular expression to be matched as many times as possible
+in the string, returning the total number of matches found. If this
+is specified with match variables, they will continue information for
+the last match only.
+.TP 15
+\fB\-inline\fR
+Causes the command to return the data that would otherwise be placed
+in match variables to be returned as a list. When using \fB-inline\fR,
+match variables may not be specified. If used with \fB-all\fR, the
+list will be concatenated at each iteration, such that a flat list is
+always returned. For each match iteration, the command will append the
+overall match data, plus one element for each subexpression in the
+regular expression. Examples are:
+.CS
+ regexp -inline -- {\\w(\\w)} " inlined "
+ => {in n}
+ regexp -all -inline -- {\\w(\\w)} " inlined "
+ => {in n li i ne e}
+.CE
+.TP 15
\fB\-start\fR \fIindex\fR
Specifies a character index offset into the string to start
matching the regular expression at. When using this switch, `^'
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index af3da2c..52ad4d8 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -13,7 +13,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.23 1999/12/12 02:26:41 hobbs Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.24 2000/02/02 22:32:11 hobbs Exp $
*/
#include "tclInt.h"
@@ -126,27 +126,29 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
int objc; /* Number of arguments. */
Tcl_Obj *CONST objv[]; /* Argument objects. */
{
- int i, indices, match, about, offset;
+ int i, indices, match, about, offset, all, doinline, numMatchesSaved;
int cflags, eflags;
Tcl_RegExp regExpr;
- Tcl_Obj *objPtr;
+ Tcl_Obj *objPtr, *resultPtr;
Tcl_RegExpInfo info;
static char *options[] = {
- "-indices", "-nocase", "-about", "-expanded",
- "-line", "-linestop", "-lineanchor", "-start",
- "--", (char *) NULL
+ "-all", "-about", "-indices", "-inline",
+ "-expanded", "-line", "-linestop", "-lineanchor",
+ "-nocase", "-start", "--", (char *) NULL
};
enum options {
- REGEXP_INDICES, REGEXP_NOCASE, REGEXP_ABOUT, REGEXP_EXPANDED,
- REGEXP_LINE, REGEXP_LINESTOP, REGEXP_LINEANCHOR, REGEXP_START,
- REGEXP_LAST
+ REGEXP_ALL, REGEXP_ABOUT, REGEXP_INDICES, REGEXP_INLINE,
+ REGEXP_EXPANDED,REGEXP_LINE, REGEXP_LINESTOP,REGEXP_LINEANCHOR,
+ REGEXP_NOCASE, REGEXP_START, REGEXP_LAST
};
- indices = 0;
- about = 0;
- cflags = TCL_REG_ADVANCED;
- eflags = 0;
- offset = 0;
+ indices = 0;
+ about = 0;
+ cflags = TCL_REG_ADVANCED;
+ eflags = 0;
+ offset = 0;
+ all = 0;
+ doinline = 0;
for (i = 1; i < objc; i++) {
char *name;
@@ -161,10 +163,18 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
return TCL_ERROR;
}
switch ((enum options) index) {
+ case REGEXP_ALL: {
+ all = 1;
+ break;
+ }
case REGEXP_INDICES: {
indices = 1;
break;
}
+ case REGEXP_INLINE: {
+ doinline = 1;
+ break;
+ }
case REGEXP_NOCASE: {
cflags |= TCL_REG_NOCASE;
break;
@@ -209,14 +219,22 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
}
endOfForLoop:
- if (objc - i < 2 - about) {
- Tcl_WrongNumArgs(interp, 1, objv,
- "?switches? exp string ?matchVar? ?subMatchVar subMatchVar ...?");
+ if ((objc - i) < (2 - about)) {
+ Tcl_WrongNumArgs(interp, 1, objv, "?switches? exp string ?matchVar? ?subMatchVar subMatchVar ...?");
return TCL_ERROR;
}
objc -= i;
objv += i;
+ if (doinline && ((objc - 2) != 0)) {
+ /*
+ * User requested -inline, but specified match variables - a no-no.
+ */
+ Tcl_AppendResult(interp, "regexp match variables not allowed",
+ " when using -inline", (char *) NULL);
+ return TCL_ERROR;
+ }
+
regExpr = Tcl_GetRegExpFromObj(interp, objv[0], cflags);
if (regExpr == NULL) {
return TCL_ERROR;
@@ -238,84 +256,142 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
eflags |= TCL_REG_NOTBOL;
}
- match = Tcl_RegExpExecObj(interp, regExpr, objPtr, offset /* offset */,
- objc-2 /* nmatches */, eflags);
-
- if (match < 0) {
- return TCL_ERROR;
- }
+ objc -= 2;
+ objv += 2;
+ resultPtr = Tcl_GetObjResult(interp);
- if (match == 0) {
+ if (doinline) {
/*
- * Set the interpreter's object result to an integer object w/
- * value 0.
+ * Save all the subexpressions, as we will return them as a list
*/
-
- Tcl_SetIntObj(Tcl_GetObjResult(interp), 0);
- return TCL_OK;
+ numMatchesSaved = -1;
+ } else {
+ /*
+ * Save only enough subexpressions for matches we want to keep,
+ * expect in the case of -all, where we need to keep at least
+ * one to know where to move the offset.
+ */
+ numMatchesSaved = (objc == 0) ? all : objc;
}
- /*
- * If additional variable names have been specified, return
- * index information in those variables.
- */
-
- objc -= 2;
- objv += 2;
-
- Tcl_RegExpGetInfo(regExpr, &info);
- for (i = 0; i < objc; i++) {
- Tcl_Obj *varPtr, *valuePtr, *newPtr;
-
- varPtr = objv[i];
- if (indices) {
- int start, end;
- Tcl_Obj *objs[2];
+ while (1) {
+ match = Tcl_RegExpExecObj(interp, regExpr, objPtr,
+ offset /* offset */, numMatchesSaved, eflags);
- if (i <= info.nsubs) {
- start = offset + info.matches[i].start;
- end = offset + info.matches[i].end;
+ if (match < 0) {
+ return TCL_ERROR;
+ }
+ if (match == 0) {
+ /*
+ * We want to set the value of the intepreter result only when
+ * this is the first time through the loop.
+ */
+ if (all <= 1) {
/*
- * Adjust index so it refers to the last character in the
- * match instead of the first character after the match.
+ * If inlining, set the interpreter's object result to an
+ * empty list, otherwise set it to an integer object w/
+ * value 0.
*/
-
- if (end >= offset) {
- end--;
+ if (doinline) {
+ Tcl_SetListObj(resultPtr, 0, NULL);
+ } else {
+ Tcl_SetIntObj(resultPtr, 0);
}
- } else {
- start = -1;
- end = -1;
+ return TCL_OK;
}
+ break;
+ }
- objs[0] = Tcl_NewLongObj(start);
- objs[1] = Tcl_NewLongObj(end);
+ /*
+ * If additional variable names have been specified, return
+ * index information in those variables.
+ */
- newPtr = Tcl_NewListObj(2, objs);
- } else {
- if (i <= info.nsubs) {
- newPtr = Tcl_GetRange(objPtr, offset + info.matches[i].start,
- offset + info.matches[i].end - 1);
+ Tcl_RegExpGetInfo(regExpr, &info);
+ if (doinline) {
+ /*
+ * It's the number of substitutions, plus one for the matchVar
+ * at index 0
+ */
+ objc = info.nsubs + 1;
+ }
+ for (i = 0; i < objc; i++) {
+ Tcl_Obj *newPtr;
+
+ if (indices) {
+ int start, end;
+ Tcl_Obj *objs[2];
+
+ if (i <= info.nsubs) {
+ start = offset + info.matches[i].start;
+ end = offset + info.matches[i].end;
+
+ /*
+ * Adjust index so it refers to the last character in the
+ * match instead of the first character after the match.
+ */
+
+ if (end >= offset) {
+ end--;
+ }
+ } else {
+ start = -1;
+ end = -1;
+ }
+
+ objs[0] = Tcl_NewLongObj(start);
+ objs[1] = Tcl_NewLongObj(end);
+
+ newPtr = Tcl_NewListObj(2, objs);
} else {
- newPtr = Tcl_NewObj();
-
+ if (i <= info.nsubs) {
+ newPtr = Tcl_GetRange(objPtr,
+ offset + info.matches[i].start,
+ offset + info.matches[i].end - 1);
+ } else {
+ newPtr = Tcl_NewObj();
+ }
+ }
+ if (doinline) {
+ if (Tcl_ListObjAppendElement(interp, resultPtr, newPtr)
+ != TCL_OK) {
+ Tcl_DecrRefCount(newPtr);
+ return TCL_ERROR;
+ }
+ } else {
+ Tcl_Obj *valuePtr;
+ valuePtr = Tcl_ObjSetVar2(interp, objv[i], NULL, newPtr, 0);
+ if (valuePtr == NULL) {
+ Tcl_DecrRefCount(newPtr);
+ Tcl_AppendResult(interp, "couldn't set variable \"",
+ Tcl_GetString(objv[i]), "\"", (char *) NULL);
+ return TCL_ERROR;
+ }
}
}
- valuePtr = Tcl_ObjSetVar2(interp, varPtr, NULL, newPtr, 0);
- if (valuePtr == NULL) {
- Tcl_DecrRefCount(newPtr);
- Tcl_AppendResult(interp, "couldn't set variable \"",
- Tcl_GetString(varPtr), "\"", (char *) NULL);
- return TCL_ERROR;
+
+ if (all == 0) {
+ break;
}
+ /*
+ * Adjust the offset to the character just after the last one
+ * in the matchVar and increment all to count how many times
+ * we are making a match
+ */
+ offset += info.matches[0].end;
+ all++;
}
/*
- * Set the interpreter's object result to an integer object w/ value 1.
+ * Set the interpreter's object result to an integer object
+ * with value 1 if -all wasn't specified, otherwise it's all-1
+ * (the number of times through the while - 1).
*/
-
- Tcl_SetIntObj(Tcl_GetObjResult(interp), 1);
+
+ if (!doinline) {
+ Tcl_SetIntObj(resultPtr, (all ? all-1 : 1));
+ }
return TCL_OK;
}
diff --git a/tests/regexp.test b/tests/regexp.test
index 6bff015..e8836bd 100644
--- a/tests/regexp.test
+++ b/tests/regexp.test
@@ -11,7 +11,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: regexp.test,v 1.10 1999/09/21 04:20:45 hobbs Exp $
+# RCS: @(#) $Id: regexp.test,v 1.11 2000/02/02 22:32:13 hobbs Exp $
if {[lsearch [namespace children] ::tcltest] == -1} {
package require tcltest
@@ -190,7 +190,7 @@ test regexp-6.2 {regexp errors} {
} {1 {wrong # args: should be "regexp ?switches? exp string ?matchVar? ?subMatchVar subMatchVar ...?"}}
test regexp-6.3 {regexp errors} {
list [catch {regexp -gorp a} msg] $msg
-} {1 {bad switch "-gorp": must be -indices, -nocase, -about, -expanded, -line, -linestop, -lineanchor, -start, or --}}
+} {1 {bad switch "-gorp": must be -all, -about, -indices, -inline, -expanded, -line, -linestop, -lineanchor, -nocase, -start, or --}}
test regexp-6.4 {regexp errors} {
list [catch {regexp a( b} msg] $msg
} {1 {couldn't compile regular expression pattern: parentheses () not balanced}}
@@ -371,8 +371,9 @@ test regexp-11.8 {regsub errors, -start bad int check} {
# This test crashes on the Mac unless you increase the Stack Space to about 1
# Meg. This is probably bigger than most users want...
-
-test regexp-12.1 {macCrash} {Tcl_RegExpExec: large number of subexpressions} {
+# 8.2.3 regexp reduced stack space requirements, but this should be
+# tested again
+test regexp-12.1 {Tcl_RegExpExec: large number of subexpressions} {macCrash} {
list [regexp (.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.) abcdefghijklmnopqrstuvwxyz all a b c d e f g h i j k l m n o p q r s t u v w x y z] $all $a $b $c $d $e $f $g $h $i $j $k $l $m $n $o $p $q $r $s $t $u $v $w $x $y $z
} {1 abcdefghijklmnopqrstuvwxyz a b c d e f g h i j k l m n o p q r s t u v w x y z}
@@ -382,10 +383,7 @@ test regexp-13.1 {regsub of a very large string} {
# is in use.
set line {BEGIN_TABLE ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; END_TABLE}
- set filedata ""
- for {set i 1} {$i<200} {incr i} {
- append filedata $line
- }
+ set filedata [string repeat $line 200]
for {set i 1} {$i<10} {incr i} {
regsub -all "BEGIN_TABLE " $filedata "" newfiledata
}
@@ -415,7 +413,7 @@ test regexp-14.2 {CompileRegexp: regexp cache, different flags} {
# There is no exec on the Mac ...
-test regexp-14.3 {unixOrPc} {CompileRegexp: regexp cache, empty regexp and empty cache} {
+test regexp-14.3 {CompileRegexp: regexp cache, empty regexp and empty cache} {unixOrPc} {
makeFile {puts [regexp {} foo]} junk.tcl
exec $::tcltest::tcltest junk.tcl
} 1
@@ -462,9 +460,49 @@ test regexp-16.4 {regsub -start, \A behavior} {
lappend out [regsub -start 2 -all {\A(\w)} {abcde} {/\1} x] $x
} {5 /a/b/c/d/e 3 ab/c/d/e}
-set x 1
-set y 2
-regexp "$x$y" 123
+test regexp-17.1 {regexp -inline} {
+ regexp -inline b ababa
+} {b}
+test regexp-17.2 {regexp -inline} {
+ regexp -inline (b) ababa
+} {b b}
+test regexp-17.3 {regexp -inline -indices} {
+ regexp -inline -indices (b) ababa
+} {{1 1} {1 1}}
+test regexp-17.4 {regexp -inline} {
+ regexp -inline {\w(\d+)\w} " hello 23 there456def "
+} {e456d 456}
+test regexp-17.5 {regexp -inline no matches} {
+ regexp -inline {\w(\d+)\w} ""
+} {}
+test regexp-17.6 {regexp -inline no matches} {
+ regexp -inline hello goodbye
+} {}
+test regexp-17.7 {regexp -inline, no matchvars allowed} {
+ list [catch {regexp -inline b abc match} msg] $msg
+} {1 {regexp match variables not allowed when using -inline}}
+
+test regexp-18.1 {regexp -all} {
+ regexp -all b bbbbb
+} {5}
+test regexp-18.2 {regexp -all} {
+ regexp -all b abababbabaaaaaaaaaab
+} {6}
+test regexp-18.3 {regexp -all -inline} {
+ regexp -all -inline b abababbabaaaaaaaaaab
+} {b b b b b b}
+test regexp-18.4 {regexp -all -inline} {
+ regexp -all -inline {\w(\w)} abcdefg
+} {ab b cd d ef f}
+test regexp-18.5 {regexp -all -inline} {
+ regexp -all -inline {\w(\w)$} abcdefg
+} {fg g}
+test regexp-18.6 {regexp -all -inline} {
+ regexp -all -inline {\d+} 10:20:30:40
+} {10 20 30 40}
+test regexp-18.7 {regexp -all -inline} {
+ list [catch {regexp -all -inline b abc match} msg] $msg
+} {1 {regexp match variables not allowed when using -inline}}
# cleanup
::tcltest::cleanupTests