diff options
author | dkf <donal.k.fellows@manchester.ac.uk> | 2017-06-22 21:48:48 (GMT) |
---|---|---|
committer | dkf <donal.k.fellows@manchester.ac.uk> | 2017-06-22 21:48:48 (GMT) |
commit | 150abed7cc47a2f1010df4700282f419d56a8a9f (patch) | |
tree | cdd8ab22151df5befc6b2793a5944e30e26fc38a /generic/tclCmdMZ.c | |
parent | 84481c3d32d19d3c3d8bdc97d6b378fb9665ced7 (diff) | |
parent | f5cf6bbf990d8bb8c07e986c9f67c94f75c878ff (diff) | |
download | tcl-150abed7cc47a2f1010df4700282f419d56a8a9f.zip tcl-150abed7cc47a2f1010df4700282f419d56a8a9f.tar.gz tcl-150abed7cc47a2f1010df4700282f419d56a8a9f.tar.bz2 |
merge trunktip_470
Diffstat (limited to 'generic/tclCmdMZ.c')
-rw-r--r-- | generic/tclCmdMZ.c | 130 |
1 files changed, 122 insertions, 8 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index ba1fc41..2c6b7bb 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -487,26 +487,28 @@ Tcl_RegsubObjCmd( Tcl_Obj *const objv[]) /* Argument objects. */ { int idx, result, cflags, all, wlen, wsublen, numMatches, offset; - int start, end, subStart, subEnd, match; + int start, end, subStart, subEnd, match, command, numParts, numArgs; Tcl_RegExp regExpr; Tcl_RegExpInfo info; Tcl_Obj *resultPtr, *subPtr, *objPtr, *startIndex = NULL; + Tcl_Obj **args = NULL, **parts; Tcl_UniChar ch, *wsrc, *wfirstChar, *wstring, *wsubspec, *wend; static const char *const options[] = { - "-all", "-nocase", "-expanded", - "-line", "-linestop", "-lineanchor", "-start", + "-all", "-command", "-expanded", "-line", + "-linestop", "-lineanchor", "-nocase", "-start", "--", NULL }; enum options { - REGSUB_ALL, REGSUB_NOCASE, REGSUB_EXPANDED, - REGSUB_LINE, REGSUB_LINESTOP, REGSUB_LINEANCHOR, REGSUB_START, + REGSUB_ALL, REGSUB_COMMAND, REGSUB_EXPANDED, REGSUB_LINE, + REGSUB_LINESTOP, REGSUB_LINEANCHOR, REGSUB_NOCASE, REGSUB_START, REGSUB_LAST }; cflags = TCL_REG_ADVANCED; all = 0; offset = 0; + command = 0; resultPtr = NULL; for (idx = 1; idx < objc; idx++) { @@ -528,6 +530,9 @@ Tcl_RegsubObjCmd( case REGSUB_NOCASE: cflags |= TCL_REG_NOCASE; break; + case REGSUB_COMMAND: + command = 1; + break; case REGSUB_EXPANDED: cflags |= TCL_REG_EXPANDED; break; @@ -585,7 +590,7 @@ Tcl_RegsubObjCmd( } } - if (all && (offset == 0) + if (all && (offset == 0) && (command == 0) && (strpbrk(TclGetString(objv[2]), "&\\") == NULL) && (strpbrk(TclGetString(objv[0]), "*+?{}()[].\\|^$") == NULL)) { /* @@ -661,6 +666,28 @@ Tcl_RegsubObjCmd( return TCL_ERROR; } + if (command) { + /* + * In command-prefix mode, we require that the third non-option + * argument be a list, so we enforce that here. Afterwards, we fetch + * the RE compilation again in case objv[0] and objv[2] are the same + * object. (If they aren't, that's cheap to do.) + */ + + if (Tcl_ListObjLength(interp, objv[2], &numParts) != TCL_OK) { + return TCL_ERROR; + } + if (numParts < 1) { + Tcl_SetObjResult(interp, Tcl_NewStringObj( + "command prefix must be a list of at least one element", + -1)); + Tcl_SetErrorCode(interp, "TCL", "OPERATION", "REGSUB", + "CMDEMPTY", NULL); + return TCL_ERROR; + } + regExpr = Tcl_GetRegExpFromObj(interp, objv[0], cflags); + } + /* * Make sure to avoid problems where the objects are shared. This can * cause RegExpObj <> UnicodeObj shimmering that causes data corruption. @@ -678,7 +705,9 @@ Tcl_RegsubObjCmd( } else { subPtr = objv[2]; } - wsubspec = Tcl_GetUnicodeFromObj(subPtr, &wsublen); + if (!command) { + wsubspec = Tcl_GetUnicodeFromObj(subPtr, &wsublen); + } result = TCL_OK; @@ -737,6 +766,88 @@ Tcl_RegsubObjCmd( Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, start); /* + * In command-prefix mode, the substitutions are added as quoted + * arguments to the subSpec to form a command, that is then executed + * and the result used as the string to substitute in. Actually, + * everything is passed through Tcl_EvalObjv, as that's much faster. + */ + + if (command) { + if (args == NULL) { + Tcl_ListObjGetElements(interp, subPtr, &numParts, &parts); + numArgs = numParts + info.nsubs + 1; + args = ckalloc(sizeof(Tcl_Obj*) * numArgs); + memcpy(args, parts, sizeof(Tcl_Obj*) * numParts); + } + + for (idx = 0 ; idx <= info.nsubs ; idx++) { + subStart = info.matches[idx].start; + subEnd = info.matches[idx].end; + if ((subStart >= 0) && (subEnd >= 0)) { + args[idx + numParts] = Tcl_NewUnicodeObj( + wstring + offset + subStart, subEnd - subStart); + } else { + args[idx + numParts] = Tcl_NewObj(); + } + Tcl_IncrRefCount(args[idx + numParts]); + } + + /* + * At this point, we're locally holding the references to the + * argument words we added for this time round the loop, and the + * subPtr is holding the references to the words that the user + * supplied directly. None are zero-refcount, which is important + * because Tcl_EvalObjv is "hairy monster" in terms of refcount + * handling, being able to optionally add references to any of its + * argument words. We'll drop the local refs immediately + * afterwarsds; subPtr is handled in the main exit stanza. + */ + + result = Tcl_EvalObjv(interp, numArgs, args, 0); + for (idx = 0 ; idx <= info.nsubs ; idx++) { + TclDecrRefCount(args[idx + numParts]); + } + if (result != TCL_OK) { + if (result == TCL_ERROR) { + Tcl_AppendObjToErrorInfo(interp, Tcl_ObjPrintf( + "\n (%s substitution computation script)", + options[REGSUB_COMMAND])); + } + goto done; + } + + Tcl_AppendObjToObj(resultPtr, Tcl_GetObjResult(interp)); + Tcl_ResetResult(interp); + + /* + * Refetch the unicode, in case the representation was smashed by + * the user code. + */ + + wstring = Tcl_GetUnicodeFromObj(objPtr, &wlen); + + offset += end; + if (end == 0 || start == end) { + /* + * Always consume at least one character of the input string + * in order to prevent infinite loops, even when we + * technically matched the empty string; we must not match + * again at the same spot. + */ + + if (offset < wlen) { + Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1); + } + offset++; + } + if (all) { + continue; + } else { + break; + } + } + + /* * Append the subSpec argument to the variable, making appropriate * substitutions. This code is a bit hairy because of the backslash * conventions and because the code saves up ranges of characters in @@ -864,6 +975,9 @@ Tcl_RegsubObjCmd( if (subPtr && (objv[2] == objv[0])) { Tcl_DecrRefCount(subPtr); } + if (args) { + ckfree(args); + } if (resultPtr) { Tcl_DecrRefCount(resultPtr); } @@ -1565,7 +1679,7 @@ StringIsCmd( } break; case STR_IS_WIDE: - if (TCL_OK == Tcl_GetWideIntFromObj(NULL, objPtr, &w)) { + if (TCL_OK == TclGetWideIntFromObj(NULL, objPtr, &w)) { break; } |