From 293d30f0a2a4756db591e83e93eae24681e0ec7a Mon Sep 17 00:00:00 2001 From: dkf Date: Sat, 11 Feb 2017 21:36:36 +0000 Subject: Proposed implementation of [regsub -command]. --- generic/tclCmdMZ.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++---- tests/regexp.test | 22 +++++++++++++- tests/regexpComp.test | 2 +- 3 files changed, 98 insertions(+), 8 deletions(-) diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index ba1fc41..ffae8b2 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -487,26 +487,28 @@ Tcl_RegsubObjCmd( Tcl_Obj *const objv[]) /* Argument objects. */ { int idx, result, cflags, all, wlen, wsublen, numMatches, offset; - int start, end, subStart, subEnd, match; + int start, end, subStart, subEnd, match, command; Tcl_RegExp regExpr; Tcl_RegExpInfo info; Tcl_Obj *resultPtr, *subPtr, *objPtr, *startIndex = NULL; + Tcl_Obj **args = NULL, *parts[2], *cmdObj; Tcl_UniChar ch, *wsrc, *wfirstChar, *wstring, *wsubspec, *wend; static const char *const options[] = { - "-all", "-nocase", "-expanded", - "-line", "-linestop", "-lineanchor", "-start", + "-all", "-command", "-expanded", "-line", + "-linestop", "-lineanchor", "-nocase", "-start", "--", NULL }; enum options { - REGSUB_ALL, REGSUB_NOCASE, REGSUB_EXPANDED, - REGSUB_LINE, REGSUB_LINESTOP, REGSUB_LINEANCHOR, REGSUB_START, + REGSUB_ALL, REGSUB_COMMAND, REGSUB_EXPANDED, REGSUB_LINE, + REGSUB_LINESTOP, REGSUB_LINEANCHOR, REGSUB_NOCASE, REGSUB_START, REGSUB_LAST }; cflags = TCL_REG_ADVANCED; all = 0; offset = 0; + command = 0; resultPtr = NULL; for (idx = 1; idx < objc; idx++) { @@ -528,6 +530,9 @@ Tcl_RegsubObjCmd( case REGSUB_NOCASE: cflags |= TCL_REG_NOCASE; break; + case REGSUB_COMMAND: + command = 1; + break; case REGSUB_EXPANDED: cflags |= TCL_REG_EXPANDED; break; @@ -585,7 +590,7 @@ Tcl_RegsubObjCmd( } } - if (all && (offset == 0) + if (all && (offset == 0) && (command == 0) && (strpbrk(TclGetString(objv[2]), "&\\") == NULL) && (strpbrk(TclGetString(objv[0]), "*+?{}()[].\\|^$") == NULL)) { /* @@ -737,6 +742,68 @@ Tcl_RegsubObjCmd( Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, start); /* + * In -command mode, the substitutions are added as quoted arguments + * to the subSpec to form a command, that is then executed and the + * result used as the string to substitute in. + */ + + if (command) { + if (args == NULL) { + args = ckalloc(sizeof(Tcl_Obj*) * (info.nsubs + 1)); + } + + for (idx = 0 ; idx <= info.nsubs ; idx++) { + subStart = info.matches[idx].start; + subEnd = info.matches[idx].end; + if ((subStart >= 0) && (subEnd >= 0)) { + args[idx] = Tcl_NewUnicodeObj( + wstring + offset + subStart, subEnd - subStart); + } else { + args[idx] = Tcl_NewObj(); + } + } + parts[0] = subPtr; + parts[1] = Tcl_NewListObj(info.nsubs+1, args); + cmdObj = Tcl_ConcatObj(2, parts); + Tcl_IncrRefCount(cmdObj); + Tcl_DecrRefCount(parts[1]); + + result = Tcl_EvalObjEx(interp, cmdObj, TCL_EVAL_DIRECT); + Tcl_DecrRefCount(cmdObj); + if (result != TCL_OK) { + if (result == TCL_ERROR) { + Tcl_AppendObjToErrorInfo(interp, Tcl_ObjPrintf( + "\n (%s substitution computation script)", + options[REGSUB_COMMAND])); + } + goto done; + } + + Tcl_AppendObjToObj(resultPtr, Tcl_GetObjResult(interp)); + Tcl_ResetResult(interp); + + offset += end; + if (end == 0 || start == end) { + /* + * Always consume at least one character of the input string + * in order to prevent infinite loops, even when we + * technically matched the empty string; we must not match + * again at the same spot. + */ + + if (offset < wlen) { + Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1); + } + offset++; + } + if (all) { + continue; + } else { + break; + } + } + + /* * Append the subSpec argument to the variable, making appropriate * substitutions. This code is a bit hairy because of the backslash * conventions and because the code saves up ranges of characters in @@ -864,6 +931,9 @@ Tcl_RegsubObjCmd( if (subPtr && (objv[2] == objv[0])) { Tcl_DecrRefCount(subPtr); } + if (args) { + ckfree(args); + } if (resultPtr) { Tcl_DecrRefCount(resultPtr); } diff --git a/tests/regexp.test b/tests/regexp.test index 4ffdbdb..6c77b41 100644 --- a/tests/regexp.test +++ b/tests/regexp.test @@ -453,7 +453,7 @@ test regexp-11.4 {regsub errors} { } {1 {wrong # args: should be "regsub ?-option ...? exp string subSpec ?varName?"}} test regexp-11.5 {regsub errors} { list [catch {regsub -gorp a b c} msg] $msg -} {1 {bad option "-gorp": must be -all, -nocase, -expanded, -line, -linestop, -lineanchor, -start, or --}} +} {1 {bad option "-gorp": must be -all, -command, -expanded, -line, -linestop, -lineanchor, -nocase, -start, or --}} test regexp-11.6 {regsub errors} { list [catch {regsub -nocase a( b c d} msg] $msg } {1 {couldn't compile regular expression pattern: parentheses () not balanced}} @@ -1123,6 +1123,26 @@ test regexp-26.12 {regexp with -line option} { test regexp-26.13 {regexp without -line option} { regexp -all -inline -- {a*} "b\n" } {{} {}} + +test regexp-27.1 {regsub -command} { + regsub -command {.x.} {abcxdef} {string length} +} ab3ef +test regexp-27.2 {regsub -command} { + regsub -command {.x.} {abcxdefxghi} {string length} +} ab3efxghi +test regexp-27.3 {regsub -command} { + set x 0 + regsub -all -command {(?=.)} abcde "incr x;#" +} 1a2b3c4d5e +test regexp-27.4 {regsub -command} -body { + regsub -command {.x.} {abcxdef} error +} -returnCodes error -result cxd +test regexp-27.5 {regsub -command} { + regsub -command {(.)(.)} {abcdef} {list ,} +} {, ab a bcdef} +test regexp-27.6 {regsub -command} { + regsub -command -all {(.)(.)} {abcdef} {list ,} +} {, ab a b, cd c d, ef e f} # cleanup ::tcltest::cleanupTests diff --git a/tests/regexpComp.test b/tests/regexpComp.test index b8e64b6..fbf8012 100644 --- a/tests/regexpComp.test +++ b/tests/regexpComp.test @@ -587,7 +587,7 @@ test regexpComp-11.5 {regsub errors} { evalInProc { list [catch {regsub -gorp a b c} msg] $msg } -} {1 {bad option "-gorp": must be -all, -nocase, -expanded, -line, -linestop, -lineanchor, -start, or --}} +} {1 {bad option "-gorp": must be -all, -command, -expanded, -line, -linestop, -lineanchor, -nocase, -start, or --}} test regexpComp-11.6 {regsub errors} { evalInProc { list [catch {regsub -nocase a( b c d} msg] $msg -- cgit v0.12