summaryrefslogtreecommitdiffstats
path: root/generic/tclCmdMZ.c
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2017-06-22 21:48:48 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2017-06-22 21:48:48 (GMT)
commit150abed7cc47a2f1010df4700282f419d56a8a9f (patch)
treecdd8ab22151df5befc6b2793a5944e30e26fc38a /generic/tclCmdMZ.c
parent84481c3d32d19d3c3d8bdc97d6b378fb9665ced7 (diff)
parentf5cf6bbf990d8bb8c07e986c9f67c94f75c878ff (diff)
downloadtcl-150abed7cc47a2f1010df4700282f419d56a8a9f.zip
tcl-150abed7cc47a2f1010df4700282f419d56a8a9f.tar.gz
tcl-150abed7cc47a2f1010df4700282f419d56a8a9f.tar.bz2
merge trunktip_470
Diffstat (limited to 'generic/tclCmdMZ.c')
-rw-r--r--generic/tclCmdMZ.c130
1 files changed, 122 insertions, 8 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index ba1fc41..2c6b7bb 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -487,26 +487,28 @@ Tcl_RegsubObjCmd(
Tcl_Obj *const objv[]) /* Argument objects. */
{
int idx, result, cflags, all, wlen, wsublen, numMatches, offset;
- int start, end, subStart, subEnd, match;
+ int start, end, subStart, subEnd, match, command, numParts, numArgs;
Tcl_RegExp regExpr;
Tcl_RegExpInfo info;
Tcl_Obj *resultPtr, *subPtr, *objPtr, *startIndex = NULL;
+ Tcl_Obj **args = NULL, **parts;
Tcl_UniChar ch, *wsrc, *wfirstChar, *wstring, *wsubspec, *wend;
static const char *const options[] = {
- "-all", "-nocase", "-expanded",
- "-line", "-linestop", "-lineanchor", "-start",
+ "-all", "-command", "-expanded", "-line",
+ "-linestop", "-lineanchor", "-nocase", "-start",
"--", NULL
};
enum options {
- REGSUB_ALL, REGSUB_NOCASE, REGSUB_EXPANDED,
- REGSUB_LINE, REGSUB_LINESTOP, REGSUB_LINEANCHOR, REGSUB_START,
+ REGSUB_ALL, REGSUB_COMMAND, REGSUB_EXPANDED, REGSUB_LINE,
+ REGSUB_LINESTOP, REGSUB_LINEANCHOR, REGSUB_NOCASE, REGSUB_START,
REGSUB_LAST
};
cflags = TCL_REG_ADVANCED;
all = 0;
offset = 0;
+ command = 0;
resultPtr = NULL;
for (idx = 1; idx < objc; idx++) {
@@ -528,6 +530,9 @@ Tcl_RegsubObjCmd(
case REGSUB_NOCASE:
cflags |= TCL_REG_NOCASE;
break;
+ case REGSUB_COMMAND:
+ command = 1;
+ break;
case REGSUB_EXPANDED:
cflags |= TCL_REG_EXPANDED;
break;
@@ -585,7 +590,7 @@ Tcl_RegsubObjCmd(
}
}
- if (all && (offset == 0)
+ if (all && (offset == 0) && (command == 0)
&& (strpbrk(TclGetString(objv[2]), "&\\") == NULL)
&& (strpbrk(TclGetString(objv[0]), "*+?{}()[].\\|^$") == NULL)) {
/*
@@ -661,6 +666,28 @@ Tcl_RegsubObjCmd(
return TCL_ERROR;
}
+ if (command) {
+ /*
+ * In command-prefix mode, we require that the third non-option
+ * argument be a list, so we enforce that here. Afterwards, we fetch
+ * the RE compilation again in case objv[0] and objv[2] are the same
+ * object. (If they aren't, that's cheap to do.)
+ */
+
+ if (Tcl_ListObjLength(interp, objv[2], &numParts) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ if (numParts < 1) {
+ Tcl_SetObjResult(interp, Tcl_NewStringObj(
+ "command prefix must be a list of at least one element",
+ -1));
+ Tcl_SetErrorCode(interp, "TCL", "OPERATION", "REGSUB",
+ "CMDEMPTY", NULL);
+ return TCL_ERROR;
+ }
+ regExpr = Tcl_GetRegExpFromObj(interp, objv[0], cflags);
+ }
+
/*
* Make sure to avoid problems where the objects are shared. This can
* cause RegExpObj <> UnicodeObj shimmering that causes data corruption.
@@ -678,7 +705,9 @@ Tcl_RegsubObjCmd(
} else {
subPtr = objv[2];
}
- wsubspec = Tcl_GetUnicodeFromObj(subPtr, &wsublen);
+ if (!command) {
+ wsubspec = Tcl_GetUnicodeFromObj(subPtr, &wsublen);
+ }
result = TCL_OK;
@@ -737,6 +766,88 @@ Tcl_RegsubObjCmd(
Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, start);
/*
+ * In command-prefix mode, the substitutions are added as quoted
+ * arguments to the subSpec to form a command, that is then executed
+ * and the result used as the string to substitute in. Actually,
+ * everything is passed through Tcl_EvalObjv, as that's much faster.
+ */
+
+ if (command) {
+ if (args == NULL) {
+ Tcl_ListObjGetElements(interp, subPtr, &numParts, &parts);
+ numArgs = numParts + info.nsubs + 1;
+ args = ckalloc(sizeof(Tcl_Obj*) * numArgs);
+ memcpy(args, parts, sizeof(Tcl_Obj*) * numParts);
+ }
+
+ for (idx = 0 ; idx <= info.nsubs ; idx++) {
+ subStart = info.matches[idx].start;
+ subEnd = info.matches[idx].end;
+ if ((subStart >= 0) && (subEnd >= 0)) {
+ args[idx + numParts] = Tcl_NewUnicodeObj(
+ wstring + offset + subStart, subEnd - subStart);
+ } else {
+ args[idx + numParts] = Tcl_NewObj();
+ }
+ Tcl_IncrRefCount(args[idx + numParts]);
+ }
+
+ /*
+ * At this point, we're locally holding the references to the
+ * argument words we added for this time round the loop, and the
+ * subPtr is holding the references to the words that the user
+ * supplied directly. None are zero-refcount, which is important
+ * because Tcl_EvalObjv is "hairy monster" in terms of refcount
+ * handling, being able to optionally add references to any of its
+ * argument words. We'll drop the local refs immediately
+ * afterwarsds; subPtr is handled in the main exit stanza.
+ */
+
+ result = Tcl_EvalObjv(interp, numArgs, args, 0);
+ for (idx = 0 ; idx <= info.nsubs ; idx++) {
+ TclDecrRefCount(args[idx + numParts]);
+ }
+ if (result != TCL_OK) {
+ if (result == TCL_ERROR) {
+ Tcl_AppendObjToErrorInfo(interp, Tcl_ObjPrintf(
+ "\n (%s substitution computation script)",
+ options[REGSUB_COMMAND]));
+ }
+ goto done;
+ }
+
+ Tcl_AppendObjToObj(resultPtr, Tcl_GetObjResult(interp));
+ Tcl_ResetResult(interp);
+
+ /*
+ * Refetch the unicode, in case the representation was smashed by
+ * the user code.
+ */
+
+ wstring = Tcl_GetUnicodeFromObj(objPtr, &wlen);
+
+ offset += end;
+ if (end == 0 || start == end) {
+ /*
+ * Always consume at least one character of the input string
+ * in order to prevent infinite loops, even when we
+ * technically matched the empty string; we must not match
+ * again at the same spot.
+ */
+
+ if (offset < wlen) {
+ Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1);
+ }
+ offset++;
+ }
+ if (all) {
+ continue;
+ } else {
+ break;
+ }
+ }
+
+ /*
* Append the subSpec argument to the variable, making appropriate
* substitutions. This code is a bit hairy because of the backslash
* conventions and because the code saves up ranges of characters in
@@ -864,6 +975,9 @@ Tcl_RegsubObjCmd(
if (subPtr && (objv[2] == objv[0])) {
Tcl_DecrRefCount(subPtr);
}
+ if (args) {
+ ckfree(args);
+ }
if (resultPtr) {
Tcl_DecrRefCount(resultPtr);
}
@@ -1565,7 +1679,7 @@ StringIsCmd(
}
break;
case STR_IS_WIDE:
- if (TCL_OK == Tcl_GetWideIntFromObj(NULL, objPtr, &w)) {
+ if (TCL_OK == TclGetWideIntFromObj(NULL, objPtr, &w)) {
break;
}