From a0eff2e10d8d67a7d2a9ed66aec116ebf483dfc8 Mon Sep 17 00:00:00 2001 From: dkf Date: Tue, 30 Oct 2012 12:39:40 +0000 Subject: Added compilation of [regsub] (in the simplest, most restricted case). --- generic/tclBasic.c | 2 +- generic/tclCompCmds.c | 174 ++++++++++++++++++++++++++++++++++++++++++++++++++ generic/tclInt.h | 3 + 3 files changed, 178 insertions(+), 1 deletion(-) diff --git a/generic/tclBasic.c b/generic/tclBasic.c index 1e07161..6e60aee 100644 --- a/generic/tclBasic.c +++ b/generic/tclBasic.c @@ -240,7 +240,7 @@ static const CmdInfo builtInCmds[] = { {"package", Tcl_PackageObjCmd, NULL, NULL, 1}, {"proc", Tcl_ProcObjCmd, NULL, NULL, 1}, {"regexp", Tcl_RegexpObjCmd, TclCompileRegexpCmd, NULL, 1}, - {"regsub", Tcl_RegsubObjCmd, NULL, NULL, 1}, + {"regsub", Tcl_RegsubObjCmd, TclCompileRegsubCmd, NULL, 1}, {"rename", Tcl_RenameObjCmd, NULL, NULL, 1}, {"return", Tcl_ReturnObjCmd, TclCompileReturnCmd, NULL, 1}, {"scan", Tcl_ScanObjCmd, NULL, NULL, 1}, diff --git a/generic/tclCompCmds.c b/generic/tclCompCmds.c index 777d66b..029606e 100644 --- a/generic/tclCompCmds.c +++ b/generic/tclCompCmds.c @@ -4644,6 +4644,180 @@ TclCompileRegexpCmd( /* *---------------------------------------------------------------------- * + * TclCompileRegsubCmd -- + * + * Procedure called to compile the "regsub" command. + * + * Results: + * Returns TCL_OK for a successful compile. Returns TCL_ERROR to defer + * evaluation to runtime. + * + * Side effects: + * Instructions are added to envPtr to execute the "regsub" command at + * runtime. + * + *---------------------------------------------------------------------- + */ + +int +TclCompileRegsubCmd( + Tcl_Interp *interp, /* Tcl interpreter for error reporting. */ + Tcl_Parse *parsePtr, /* Points to a parse structure for the + * command. */ + Command *cmdPtr, /* Points to defintion of command being + * compiled. */ + CompileEnv *envPtr) /* Holds the resulting instructions. */ +{ + /* + * We only compile the case with [regsub -all] where the pattern is both + * known at compile time and simple (i.e., no RE metacharacters). That is, + * the pattern must be translatable into a glob like "*foo*" with no other + * glob metacharacters inside it; there must be some "foo" in there too. + * The substitution string must also be known at compile time and free of + * metacharacters ("\digit" and "&"). Finally, there must not be a + * variable mentioned in the [regsub] to write the result back to (because + * we can't get the count of substitutions that would be the result in + * that case). The key is that these are the conditions under which a + * [string map] could be used instead, in particular a [string map] of the + * form we can compile to bytecode. + * + * In short, we look for: + * + * regsub -all [--] simpleRE string simpleReplacement + * + * The only optional part is the "--", and no other options are handled. + */ + + DefineLineInformation; /* TIP #280 */ + Tcl_Token *tokenPtr, *stringTokenPtr; + Tcl_Obj *patternObj = NULL, *replacementObj = NULL; + Tcl_DString pattern; + const char *bytes; + int len, exact, result = TCL_ERROR; + + if (parsePtr->numWords < 5 || parsePtr->numWords > 6) { + return TCL_ERROR; + } + + /* + * Parse the "-all", which must be the first argument (other options not + * supported, non-"-all" substitution we can't compile). + */ + + tokenPtr = TokenAfter(parsePtr->tokenPtr); + if (tokenPtr->type != TCL_TOKEN_SIMPLE_WORD || tokenPtr[1].size != 4 + || strncmp(tokenPtr[1].start, "-all", 4)) { + return TCL_ERROR; + } + + /* + * Get the pattern into patternObj, checking for "--" in the process. + */ + + Tcl_DStringInit(&pattern); + tokenPtr = TokenAfter(tokenPtr); + patternObj = Tcl_NewObj(); + if (!TclWordKnownAtCompileTime(tokenPtr, patternObj)) { + goto done; + } + if (Tcl_GetString(patternObj)[0] == '-') { + if (strcmp(Tcl_GetString(patternObj), "--") != 0 + || parsePtr->numWords == 5) { + goto done; + } + tokenPtr = TokenAfter(tokenPtr); + Tcl_DecrRefCount(patternObj); + patternObj = Tcl_NewObj(); + if (!TclWordKnownAtCompileTime(tokenPtr, patternObj)) { + goto done; + } + } else if (parsePtr->numWords == 6) { + goto done; + } + + /* + * Identify the code which produces the string to apply the substitution + * to (stringTokenPtr), and the replacement string (into replacementObj). + */ + + stringTokenPtr = TokenAfter(tokenPtr); + tokenPtr = TokenAfter(stringTokenPtr); + replacementObj = Tcl_NewObj(); + if (!TclWordKnownAtCompileTime(tokenPtr, replacementObj)) { + goto done; + } + + /* + * Next, higher-level checks. Is the RE a very simple glob? Is the + * replacement "simple"? + */ + + bytes = Tcl_GetStringFromObj(patternObj, &len); + if (TclReToGlob(NULL, bytes, len, &pattern, &exact) != TCL_OK || exact) { + goto done; + } + bytes = Tcl_DStringValue(&pattern); + if (*bytes++ != '*') { + goto done; + } + while (1) { + switch (*bytes) { + case '*': + if (bytes[1] == '\0') { + /* + * OK, we've proved there are no metacharacters except for the + * '*' at each end. + */ + + len = Tcl_DStringLength(&pattern) - 2; + if (len > 0) { + goto isSimpleGlob; + } + + /* + * The pattern is "**"! I believe that should be impossible, + * but we definitely can't handle that at all. + */ + } + case '\0': case '?': case '[': case '\\': + goto done; + } + bytes++; + } + isSimpleGlob: + for (bytes = Tcl_GetString(replacementObj); *bytes; bytes++) { + switch (*bytes) { + case '\\': case '&': + goto done; + } + } + + /* + * Proved the simplicity constraints! Time to issue the code. + */ + + result = TCL_OK; + bytes = Tcl_DStringValue(&pattern) + 1; + PushLiteral(envPtr, bytes, len); + bytes = Tcl_GetStringFromObj(replacementObj, &len); + PushLiteral(envPtr, bytes, len); + CompileWord(envPtr, stringTokenPtr, interp, parsePtr->numWords-2); + TclEmitOpcode( INST_STR_MAP, envPtr); + + done: + Tcl_DStringFree(&pattern); + if (patternObj) { + Tcl_DecrRefCount(patternObj); + } + if (replacementObj) { + Tcl_DecrRefCount(replacementObj); + } + return result; +} + +/* + *---------------------------------------------------------------------- + * * TclCompileReturnCmd -- * * Procedure called to compile the "return" command. diff --git a/generic/tclInt.h b/generic/tclInt.h index 3aaed4c..27da005 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3616,6 +3616,9 @@ MODULE_SCOPE int TclCompileObjectSelfCmd(Tcl_Interp *interp, MODULE_SCOPE int TclCompileRegexpCmd(Tcl_Interp *interp, Tcl_Parse *parsePtr, Command *cmdPtr, struct CompileEnv *envPtr); +MODULE_SCOPE int TclCompileRegsubCmd(Tcl_Interp *interp, + Tcl_Parse *parsePtr, Command *cmdPtr, + struct CompileEnv *envPtr); MODULE_SCOPE int TclCompileReturnCmd(Tcl_Interp *interp, Tcl_Parse *parsePtr, Command *cmdPtr, struct CompileEnv *envPtr); -- cgit v0.12