From 6ae96a37e6162c036ed6bcda3336270182bf6d8b Mon Sep 17 00:00:00 2001 From: dkf Date: Sun, 14 Dec 2003 18:32:36 +0000 Subject: TIP#75 Implementation --- ChangeLog | 6 ++ doc/switch.n | 33 ++++++++- generic/tclCmdMZ.c | 191 +++++++++++++++++++++++++++++++++++++++++++++++------ tests/switch.test | 97 ++++++++++++++++++++++++++- 4 files changed, 305 insertions(+), 22 deletions(-) diff --git a/ChangeLog b/ChangeLog index 54537f0..47978f0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2003-12-14 Donal K. Fellows + + * generic/tclCmdMZ.c (Tcl_SwitchObjCmd): TIP#75 Implementation + * tests/switch.test: Can now get submatch information when + * doc/switch.n: using -regexp matching in [switch]. + 2003-12-14 Vince Darley * generic/tclPathObj.c: complete rewrite of generic file diff --git a/doc/switch.n b/doc/switch.n index a17dd93..2ef1cd2 100644 --- a/doc/switch.n +++ b/doc/switch.n @@ -5,7 +5,7 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" RCS: @(#) $Id: switch.n,v 1.5 2000/09/07 14:27:51 poenitz Exp $ +'\" RCS: @(#) $Id: switch.n,v 1.6 2003/12/14 18:32:36 dkf Exp $ '\" .so man.macros .TH switch n 7.0 Tcl "Tcl Built-In Commands" @@ -48,6 +48,37 @@ When matching \fIstring\fR to the patterns, use glob-style matching When matching \fIstring\fR to the patterns, use regular expression matching (as described in the \fBre_syntax\fR reference page). +'\" Options defined by TIP#75 +.VS 8.5 +.TP 10 +\fB\-matchvar\fR \fIvarName\fR +This option (only legal when \fB\-regexp\fR is also specified) +specifies the name of a variable into which the list of matches +found by the regular expression engine will be written. The first +element of the list written will be the overall substring of the input +string (i.e. the \fIstring\fR argument to \fBswitch\fR) matched, the +second element of the list will be the substring matched by the first +capturing parenthesis in the regular expression that matched, and so +on. When a \fBdefault\fR branch is taken, the variable will have the +empty list written to it. This option may be specified at the same +time as the \fB\-indexvar\fR option. +.TP 10 +\fB\-indexvar\fR \fIvarName\fR +This option (only legal when \fB\-regexp\fR is also specified) +specifies the name of a variable into which the list of indices +referring to matching substrings +found by the regular expression engine will be written. The first +element of the list written will be a two-element list specifying the +index of the start and index of the first character after the end of +the overall substring of the input +string (i.e. the \fIstring\fR argument to \fBswitch\fR) matched, in a +similar way to the \fB\-indices\fR option to the \fBregexp\fR can +obtain. Similarly, the second element of the list refers to the first +capturing parenthesis in the regular expression that matched, and so +on. When a \fBdefault\fR branch is taken, the variable will have the +empty list written to it. This option may be specified at the same +time as the \fB\-matchvar\fR option. +.VE 8.5 .TP 10 \fB\-\|\-\fR Marks the end of options. The argument following this one will diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 2a78838..d1961b7 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -10,11 +10,12 @@ * Copyright (c) 1994-1997 Sun Microsystems, Inc. * Copyright (c) 1998-2000 Scriptics Corporation. * Copyright (c) 2002 ActiveState Corporation. + * Copyright (c) 2003 Donal K. Fellows. * * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdMZ.c,v 1.96 2003/10/14 18:23:39 vincentdarley Exp $ + * RCS: @(#) $Id: tclCmdMZ.c,v 1.97 2003/12/14 18:32:36 dkf Exp $ */ #include "tclInt.h" @@ -2561,19 +2562,23 @@ Tcl_SwitchObjCmd(dummy, interp, objc, objv) int objc; /* Number of arguments. */ Tcl_Obj *CONST objv[]; /* Argument objects. */ { - int i, j, index, mode, matched, result, splitObjs; + int i, j, index, mode, matched, result, splitObjs, numMatchesSaved; char *string, *pattern; - Tcl_Obj *stringObj; + Tcl_Obj *stringObj, *indexVarObj, *matchVarObj; Tcl_Obj *CONST *savedObjv = objv; + Tcl_RegExp regExpr; static CONST char *options[] = { - "-exact", "-glob", "-regexp", "--", + "-exact", "-glob", "-indexvar", "-matchvar", "-regexp", "--", NULL }; enum options { - OPT_EXACT, OPT_GLOB, OPT_REGEXP, OPT_LAST + OPT_EXACT, OPT_GLOB, OPT_INDEXV, OPT_MATCHV, OPT_REGEXP, OPT_LAST }; mode = OPT_EXACT; + indexVarObj = NULL; + matchVarObj = NULL; + numMatchesSaved = 0; for (i = 1; i < objc; i++) { string = Tcl_GetString(objv[i]); if (string[0] != '-') { @@ -2587,7 +2592,35 @@ Tcl_SwitchObjCmd(dummy, interp, objc, objv) i++; break; } - mode = index; + + /* + * Check for TIP#75 options specifying the variables to write + * regexp information into. + */ + + if (index == OPT_INDEXV) { + i++; + if (i == objc) { + Tcl_AppendResult(interp, + "missing variable name argument to -indexvar option", + (char *) NULL); + return TCL_ERROR; + } + indexVarObj = objv[i]; + numMatchesSaved = -1; + } else if (index == OPT_MATCHV) { + i++; + if (i == objc) { + Tcl_AppendResult(interp, + "missing variable name argument to -matchvar option", + (char *) NULL); + return TCL_ERROR; + } + matchVarObj = objv[i]; + numMatchesSaved = -1; + } else { + mode = index; + } } if (objc - i < 2) { @@ -2595,6 +2628,16 @@ Tcl_SwitchObjCmd(dummy, interp, objc, objv) "?switches? string pattern body ... ?default body?"); return TCL_ERROR; } + if (indexVarObj != NULL && mode != OPT_REGEXP) { + Tcl_AppendResult(interp, + "-indexvar option requires -regexp option", (char *) NULL); + return TCL_ERROR; + } + if (matchVarObj != NULL && mode != OPT_REGEXP) { + Tcl_AppendResult(interp, + "-matchvar option requires -regexp option", (char *) NULL); + return TCL_ERROR; + } stringObj = objv[i]; objc -= i + 1; @@ -2682,22 +2725,57 @@ Tcl_SwitchObjCmd(dummy, interp, objc, objv) if ((i == objc - 2) && (*pattern == 'd') && (strcmp(pattern, "default") == 0)) { + Tcl_Obj *emptyObj = NULL; + matched = 1; + /* + * If either indexVarObj or matchVarObj are non-NULL, + * we're in REGEXP mode but have reached the default + * clause anyway. TIP#75 specifies that we set the + * variables to empty lists (== empty objects) in that + * case. + */ + if (indexVarObj != NULL) { + TclNewObj(emptyObj); + if (Tcl_ObjSetVar2(interp, indexVarObj, NULL, emptyObj, + TCL_LEAVE_ERR_MSG) == NULL) { + Tcl_DecrRefCount(emptyObj); + return TCL_ERROR; + } + } + if (matchVarObj != NULL) { + if (emptyObj == NULL) { + TclNewObj(emptyObj); + } + if (Tcl_ObjSetVar2(interp, matchVarObj, NULL, emptyObj, + TCL_LEAVE_ERR_MSG) == NULL) { + if (indexVarObj == NULL) { + Tcl_DecrRefCount(emptyObj); + } + return TCL_ERROR; + } + } + numMatchesSaved = 0; } else { switch (mode) { - case OPT_EXACT: - matched = (strcmp(Tcl_GetString(stringObj), pattern) == 0); - break; - case OPT_GLOB: - matched = Tcl_StringMatch(Tcl_GetString(stringObj), - pattern); - break; - case OPT_REGEXP: - matched = Tcl_RegExpMatchObj(interp, stringObj, objv[i]); - if (matched < 0) { - return TCL_ERROR; - } - break; + case OPT_EXACT: + matched = (strcmp(Tcl_GetString(stringObj), pattern) == 0); + break; + case OPT_GLOB: + matched = Tcl_StringMatch(Tcl_GetString(stringObj), pattern); + break; + case OPT_REGEXP: + regExpr = Tcl_GetRegExpFromObj(interp, objv[i], + TCL_REG_ADVANCED); + if (regExpr == NULL) { + return TCL_ERROR; + } + matched = Tcl_RegExpExecObj(interp, regExpr, stringObj, 0, + numMatchesSaved, 0); + if (matched < 0) { + return TCL_ERROR; + } + break; } } if (matched == 0) { @@ -2705,6 +2783,81 @@ Tcl_SwitchObjCmd(dummy, interp, objc, objv) } /* + * We are operating in REGEXP mode and we need to store + * information about what we matched in some user-nominated + * arrays. So build the lists of values and indices to write + * here. [TIP#75] + */ + + if (numMatchesSaved) { + Tcl_RegExpInfo info; + Tcl_Obj *matchesObj, *indicesObj; + + Tcl_RegExpGetInfo(regExpr, &info); + if (matchVarObj != NULL) { + TclNewObj(matchesObj); + } else { + matchesObj = NULL; + } + if (indexVarObj != NULL) { + TclNewObj(indicesObj); + } + for (j=0 ; j<=info.nsubs ; j++) { + if (indexVarObj != NULL) { + Tcl_Obj *rangeObjAry[2]; + + rangeObjAry[0] = Tcl_NewLongObj(info.matches[j].start); + rangeObjAry[1] = Tcl_NewLongObj(info.matches[j].end); + /* + * Never fails; the object is always clean at this point. + */ + Tcl_ListObjAppendElement(NULL, indicesObj, + Tcl_NewListObj(2, rangeObjAry)); + } + if (matchVarObj != NULL) { + Tcl_Obj *substringObj; + + substringObj = Tcl_GetRange(stringObj, + info.matches[j].start, info.matches[j].end-1); + /* + * Never fails; the object is always clean at this point. + */ + Tcl_ListObjAppendElement(NULL, matchesObj, substringObj); + } + } + if (indexVarObj != NULL) { + if (Tcl_ObjSetVar2(interp, indexVarObj, NULL, indicesObj, + TCL_LEAVE_ERR_MSG) == NULL) { + Tcl_DecrRefCount(indicesObj); + /* + * Careful! Check to see if we have allocated the + * list of matched strings; if so (but there was + * an error assigning the indices list) we have a + * potential memory leak because the match list + * has not been written to a variable. Except + * that we'll clean that up right now. + */ + if (matchesObj != NULL) { + Tcl_DecrRefCount(matchesObj); + } + return TCL_ERROR; + } + } + if (matchVarObj != NULL) { + if (Tcl_ObjSetVar2(interp, matchVarObj, NULL, matchesObj, + TCL_LEAVE_ERR_MSG) == NULL) { + Tcl_DecrRefCount(matchesObj); + /* + * Unlike above, if indicesObj is non-NULL at this + * point, it will have been written to a variable + * already and will hence not be leaked. + */ + return TCL_ERROR; + } + } + } + + /* * We've got a match. Find a body to execute, skipping bodies * that are "-". */ diff --git a/tests/switch.test b/tests/switch.test index cfc80ec..3218e3a 100644 --- a/tests/switch.test +++ b/tests/switch.test @@ -11,7 +11,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: switch.test,v 1.9 2003/04/28 10:05:28 dkf Exp $ +# RCS: @(#) $Id: switch.test,v 1.10 2003/12/14 18:32:36 dkf Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest @@ -89,7 +89,7 @@ test switch-3.5 {-exact vs. -glob vs. -regexp} { } exact test switch-3.6 {-exact vs. -glob vs. -regexp} { list [catch {switch -foo a b c} msg] $msg -} {1 {bad option "-foo": must be -exact, -glob, -regexp, or --}} +} {1 {bad option "-foo": must be -exact, -glob, -indexvar, -matchvar, -regexp, or --}} test switch-4.1 {error in executed command} { list [catch {switch a a {error "Just a test"} default {format 1}} msg] \ @@ -360,6 +360,99 @@ rename iswtest-exact {} rename cswtest2-exact {} rename iswtest2-exact {} +# Added due to TIP#75 +test switch-11.1 {regexp matching with -matchvar} { + switch -regexp -matchvar x -- abc {.(.). {set x}} +} {abc b} +test switch-11.2 {regexp matching with -matchvar} { + set x GOOD + switch -regexp -matchvar x -- abc {.(.).. {list $x z}} + set x +} GOOD +test switch-11.3 {regexp matching with -matchvar} { + switch -regexp -matchvar x -- "a b c" {.(.). {set x}} +} {{a b} { }} +test switch-11.4 {regexp matching with -matchvar} { + set x BAD + switch -regexp -matchvar x -- "a b c" { + bc {list $x YES} + default {list $x NO} + } +} {{} NO} +test switch-11.5 {-matchvar without -regexp} { + set x {} + list [catch {switch -glob -matchvar x -- abc . {set x}} msg] $x $msg +} {1 {} {-matchvar option requires -regexp option}} +test switch-11.6 {-matchvar unwritable} { + set x {} + list [catch {switch -regexp -matchvar x(x) -- abc . {set x}} msg] $x $msg +} {1 {} {can't set "x(x)": variable isn't array}} + +test switch-12.1 {regexp matching with -indexvar} { + switch -regexp -indexvar x -- abc {.(.). {set x}} +} {{0 3} {1 2}} +test switch-12.2 {regexp matching with -indexvar} { + set x GOOD + switch -regexp -indexvar x -- abc {.(.).. {list $x z}} + set x +} GOOD +test switch-12.3 {regexp matching with -indexvar} { + switch -regexp -indexvar x -- "a b c" {.(.). {set x}} +} {{0 3} {1 2}} +test switch-12.4 {regexp matching with -indexvar} { + set x BAD + switch -regexp -indexvar x -- "a b c" { + bc {list $x YES} + default {list $x NO} + } +} {{} NO} +test switch-12.5 {-indexvar without -regexp} { + set x {} + list [catch {switch -glob -indexvar x -- abc . {set x}} msg] $x $msg +} {1 {} {-indexvar option requires -regexp option}} +test switch-12.6 {-indexvar unwritable} { + set x {} + list [catch {switch -regexp -indexvar x(x) -- abc . {set x}} msg] $x $msg +} {1 {} {can't set "x(x)": variable isn't array}} + +test switch-13.1 {-indexvar -matchvar combinations} { + switch -regexp -indexvar x -matchvar y abc { + . {list $x $y} + } +} {{{0 1}} a} +test switch-13.2 {-indexvar -matchvar combinations} { + switch -regexp -indexvar x -matchvar y abc { + .$ {list $x $y} + } +} {{{2 3}} c} +test switch-13.3 {-indexvar -matchvar combinations} { + switch -regexp -indexvar x -matchvar y abc { + (.)(.)(.) {list $x $y} + } +} {{{0 3} {0 1} {1 2} {2 3}} {abc a b c}} +test switch-13.4 {-indexvar -matchvar combinations} { + set x - + set y - + switch -regexp -indexvar x -matchvar y abc { + (.)(.)(.). - + default {list $x $y} + } +} {{} {}} +test switch-13.5 {-indexvar -matchvar combinations} { + set x - + set y - + list [catch { + switch -regexp -indexvar x(x) -matchvar y abc {. {list $x $y}} + } msg] $x $y $msg +} {1 - - {can't set "x(x)": variable isn't array}} +test switch-13.6 {-indexvar -matchvar combinations} { + set x - + set y - + list [catch { + switch -regexp -indexvar x -matchvar y(y) abc {. {list $x $y}} + } msg] $x $y $msg +} {1 {{0 1}} - {can't set "y(y)": variable isn't array}} + # cleanup ::tcltest::cleanupTests return -- cgit v0.12