summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2017-02-11 21:36:36 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2017-02-11 21:36:36 (GMT)
commit293d30f0a2a4756db591e83e93eae24681e0ec7a (patch)
treee58a24b15db6580a63f08ba1979616861a7c5cbb
parent089e88af6f855e42658be530688791a814c26e91 (diff)
downloadtcl-293d30f0a2a4756db591e83e93eae24681e0ec7a.zip
tcl-293d30f0a2a4756db591e83e93eae24681e0ec7a.tar.gz
tcl-293d30f0a2a4756db591e83e93eae24681e0ec7a.tar.bz2
Proposed implementation of [regsub -command].
-rw-r--r--generic/tclCmdMZ.c82
-rw-r--r--tests/regexp.test22
-rw-r--r--tests/regexpComp.test2
3 files changed, 98 insertions, 8 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index ba1fc41..ffae8b2 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -487,26 +487,28 @@ Tcl_RegsubObjCmd(
Tcl_Obj *const objv[]) /* Argument objects. */
{
int idx, result, cflags, all, wlen, wsublen, numMatches, offset;
- int start, end, subStart, subEnd, match;
+ int start, end, subStart, subEnd, match, command;
Tcl_RegExp regExpr;
Tcl_RegExpInfo info;
Tcl_Obj *resultPtr, *subPtr, *objPtr, *startIndex = NULL;
+ Tcl_Obj **args = NULL, *parts[2], *cmdObj;
Tcl_UniChar ch, *wsrc, *wfirstChar, *wstring, *wsubspec, *wend;
static const char *const options[] = {
- "-all", "-nocase", "-expanded",
- "-line", "-linestop", "-lineanchor", "-start",
+ "-all", "-command", "-expanded", "-line",
+ "-linestop", "-lineanchor", "-nocase", "-start",
"--", NULL
};
enum options {
- REGSUB_ALL, REGSUB_NOCASE, REGSUB_EXPANDED,
- REGSUB_LINE, REGSUB_LINESTOP, REGSUB_LINEANCHOR, REGSUB_START,
+ REGSUB_ALL, REGSUB_COMMAND, REGSUB_EXPANDED, REGSUB_LINE,
+ REGSUB_LINESTOP, REGSUB_LINEANCHOR, REGSUB_NOCASE, REGSUB_START,
REGSUB_LAST
};
cflags = TCL_REG_ADVANCED;
all = 0;
offset = 0;
+ command = 0;
resultPtr = NULL;
for (idx = 1; idx < objc; idx++) {
@@ -528,6 +530,9 @@ Tcl_RegsubObjCmd(
case REGSUB_NOCASE:
cflags |= TCL_REG_NOCASE;
break;
+ case REGSUB_COMMAND:
+ command = 1;
+ break;
case REGSUB_EXPANDED:
cflags |= TCL_REG_EXPANDED;
break;
@@ -585,7 +590,7 @@ Tcl_RegsubObjCmd(
}
}
- if (all && (offset == 0)
+ if (all && (offset == 0) && (command == 0)
&& (strpbrk(TclGetString(objv[2]), "&\\") == NULL)
&& (strpbrk(TclGetString(objv[0]), "*+?{}()[].\\|^$") == NULL)) {
/*
@@ -737,6 +742,68 @@ Tcl_RegsubObjCmd(
Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, start);
/*
+ * In -command mode, the substitutions are added as quoted arguments
+ * to the subSpec to form a command, that is then executed and the
+ * result used as the string to substitute in.
+ */
+
+ if (command) {
+ if (args == NULL) {
+ args = ckalloc(sizeof(Tcl_Obj*) * (info.nsubs + 1));
+ }
+
+ for (idx = 0 ; idx <= info.nsubs ; idx++) {
+ subStart = info.matches[idx].start;
+ subEnd = info.matches[idx].end;
+ if ((subStart >= 0) && (subEnd >= 0)) {
+ args[idx] = Tcl_NewUnicodeObj(
+ wstring + offset + subStart, subEnd - subStart);
+ } else {
+ args[idx] = Tcl_NewObj();
+ }
+ }
+ parts[0] = subPtr;
+ parts[1] = Tcl_NewListObj(info.nsubs+1, args);
+ cmdObj = Tcl_ConcatObj(2, parts);
+ Tcl_IncrRefCount(cmdObj);
+ Tcl_DecrRefCount(parts[1]);
+
+ result = Tcl_EvalObjEx(interp, cmdObj, TCL_EVAL_DIRECT);
+ Tcl_DecrRefCount(cmdObj);
+ if (result != TCL_OK) {
+ if (result == TCL_ERROR) {
+ Tcl_AppendObjToErrorInfo(interp, Tcl_ObjPrintf(
+ "\n (%s substitution computation script)",
+ options[REGSUB_COMMAND]));
+ }
+ goto done;
+ }
+
+ Tcl_AppendObjToObj(resultPtr, Tcl_GetObjResult(interp));
+ Tcl_ResetResult(interp);
+
+ offset += end;
+ if (end == 0 || start == end) {
+ /*
+ * Always consume at least one character of the input string
+ * in order to prevent infinite loops, even when we
+ * technically matched the empty string; we must not match
+ * again at the same spot.
+ */
+
+ if (offset < wlen) {
+ Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1);
+ }
+ offset++;
+ }
+ if (all) {
+ continue;
+ } else {
+ break;
+ }
+ }
+
+ /*
* Append the subSpec argument to the variable, making appropriate
* substitutions. This code is a bit hairy because of the backslash
* conventions and because the code saves up ranges of characters in
@@ -864,6 +931,9 @@ Tcl_RegsubObjCmd(
if (subPtr && (objv[2] == objv[0])) {
Tcl_DecrRefCount(subPtr);
}
+ if (args) {
+ ckfree(args);
+ }
if (resultPtr) {
Tcl_DecrRefCount(resultPtr);
}
diff --git a/tests/regexp.test b/tests/regexp.test
index 4ffdbdb..6c77b41 100644
--- a/tests/regexp.test
+++ b/tests/regexp.test
@@ -453,7 +453,7 @@ test regexp-11.4 {regsub errors} {
} {1 {wrong # args: should be "regsub ?-option ...? exp string subSpec ?varName?"}}
test regexp-11.5 {regsub errors} {
list [catch {regsub -gorp a b c} msg] $msg
-} {1 {bad option "-gorp": must be -all, -nocase, -expanded, -line, -linestop, -lineanchor, -start, or --}}
+} {1 {bad option "-gorp": must be -all, -command, -expanded, -line, -linestop, -lineanchor, -nocase, -start, or --}}
test regexp-11.6 {regsub errors} {
list [catch {regsub -nocase a( b c d} msg] $msg
} {1 {couldn't compile regular expression pattern: parentheses () not balanced}}
@@ -1123,6 +1123,26 @@ test regexp-26.12 {regexp with -line option} {
test regexp-26.13 {regexp without -line option} {
regexp -all -inline -- {a*} "b\n"
} {{} {}}
+
+test regexp-27.1 {regsub -command} {
+ regsub -command {.x.} {abcxdef} {string length}
+} ab3ef
+test regexp-27.2 {regsub -command} {
+ regsub -command {.x.} {abcxdefxghi} {string length}
+} ab3efxghi
+test regexp-27.3 {regsub -command} {
+ set x 0
+ regsub -all -command {(?=.)} abcde "incr x;#"
+} 1a2b3c4d5e
+test regexp-27.4 {regsub -command} -body {
+ regsub -command {.x.} {abcxdef} error
+} -returnCodes error -result cxd
+test regexp-27.5 {regsub -command} {
+ regsub -command {(.)(.)} {abcdef} {list ,}
+} {, ab a bcdef}
+test regexp-27.6 {regsub -command} {
+ regsub -command -all {(.)(.)} {abcdef} {list ,}
+} {, ab a b, cd c d, ef e f}
# cleanup
::tcltest::cleanupTests
diff --git a/tests/regexpComp.test b/tests/regexpComp.test
index b8e64b6..fbf8012 100644
--- a/tests/regexpComp.test
+++ b/tests/regexpComp.test
@@ -587,7 +587,7 @@ test regexpComp-11.5 {regsub errors} {
evalInProc {
list [catch {regsub -gorp a b c} msg] $msg
}
-} {1 {bad option "-gorp": must be -all, -nocase, -expanded, -line, -linestop, -lineanchor, -start, or --}}
+} {1 {bad option "-gorp": must be -all, -command, -expanded, -line, -linestop, -lineanchor, -nocase, -start, or --}}
test regexpComp-11.6 {regsub errors} {
evalInProc {
list [catch {regsub -nocase a( b c d} msg] $msg