From 4b4b968689d3029079c5298593d1cb210189c041 Mon Sep 17 00:00:00 2001 From: dkf Date: Tue, 13 Nov 2007 22:44:00 +0000 Subject: Extend [switch] compiler to handle regular expressions as long as things are not too complex. Fix [regexp] compiler so that non-trivial literal regexps get fed to INST_REGEXP. --- ChangeLog | 5 ++ generic/tclCompCmds.c | 186 +++++++++++++++++++++++++++++++------------------- 2 files changed, 120 insertions(+), 71 deletions(-) diff --git a/ChangeLog b/ChangeLog index ba8d404..4baa164 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,10 @@ 2007-11-13 Donal K. Fellows + * generic/tclCompCmds.c (TclCompileSwitchCmd, TclCompileRegexpCmd): + Extend [switch] compiler to handle regular expressions as long as + things are not too complex. Fix [regexp] compiler so that non-trivial + literal regexps get fed to INST_REGEXP. + * doc/mathop.n: Clarify definitions of some operations. 2007-11-13 Miguel Sofer diff --git a/generic/tclCompCmds.c b/generic/tclCompCmds.c index 6179190..08b1370 100644 --- a/generic/tclCompCmds.c +++ b/generic/tclCompCmds.c @@ -12,7 +12,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCompCmds.c,v 1.123 2007/11/12 02:07:19 hobbs Exp $ + * RCS: @(#) $Id: tclCompCmds.c,v 1.124 2007/11/13 22:44:01 dkf Exp $ */ #include "tclInt.h" @@ -61,7 +61,6 @@ TclCompileCmdWord((interp), (tokenPtr)+1, (tokenPtr)->numComponents, \ (envPtr)) - /* * Convenience macro for use when compiling tokens to be pushed. The ANSI C * "prototype" for this macro is: @@ -916,8 +915,7 @@ TclCompileDictCmd( Tcl_Token **keyTokenPtrs, *dictVarTokenPtr, *bodyTokenPtr; DictUpdateInfo *duiPtr; JumpFixup jumpFixup; - - + /* * Parse the command. Expect the following: * dict update ? ...? @@ -995,9 +993,9 @@ TclCompileDictCmd( /* * Normal termination code: the stack has the key list below the * result of the body evaluation: swap them and finish the update - * code. + * code. */ - + TclEmitOpcode( INST_END_CATCH, envPtr); TclEmitInstInt4( INST_REVERSE, 2, envPtr); TclEmitInstInt4( INST_DICT_UPDATE_END, dictIndex, envPtr); @@ -1006,7 +1004,7 @@ TclCompileDictCmd( /* * Jump around the exceptional termination code */ - + TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP, &jumpFixup); /* @@ -1014,7 +1012,7 @@ TclCompileDictCmd( * options in the stack, bring up the key list, finish the update * code, and finally return with the catched return data */ - + ExceptionRangeTarget(envPtr, range, catchOffset); TclEmitOpcode( INST_PUSH_RESULT, envPtr); TclEmitOpcode( INST_PUSH_RETURN_OPTIONS, envPtr); @@ -1025,7 +1023,6 @@ TclCompileDictCmd( TclEmitInt4( infoIndex, envPtr); TclEmitOpcode( INST_RETURN_STK, envPtr); - if (TclFixupForwardJumpToHere(envPtr, &jumpFixup, 127)) { Tcl_Panic("TclCompileDictCmd(update): bad jump distance %d", CurrentOffset(envPtr) - jumpFixup.codeOffset); @@ -1303,7 +1300,6 @@ TclCompileForCmd( envPtr->currStackDepth = savedStackDepth + 1; TclEmitOpcode(INST_POP, envPtr); - /* * Compile the "next" subcommand. */ @@ -1890,7 +1886,6 @@ TclCompileIfCmd( tokenPtr = TokenAfter(tokenPtr); } - TclInitJumpFixupArray(&jumpFalseFixupArray); TclInitJumpFixupArray(&jumpEndFixupArray); code = TCL_OK; @@ -1929,7 +1924,6 @@ TclCompileIfCmd( envPtr->currStackDepth = savedStackDepth; testTokenPtr = tokenPtr; - if (realCond) { /* * Find out if the condition is a constant. @@ -1964,7 +1958,6 @@ TclCompileIfCmd( code = TCL_OK; } - /* * Skip over the optional "then" before the then clause. */ @@ -2976,12 +2969,13 @@ TclCompileRegexpCmd( if (TclReToGlob(NULL, varTokenPtr[1].start, len, &ds, &exact) != TCL_OK) { - return TCL_ERROR; + simple = 0; + } else { + PushLiteral(envPtr, Tcl_DStringValue(&ds),Tcl_DStringLength(&ds)); } - - PushLiteral(envPtr, Tcl_DStringValue(&ds), Tcl_DStringLength(&ds)); Tcl_DStringFree(&ds); - } else { + } + if (!simple) { CompileWord(envPtr, varTokenPtr, interp, parsePtr->numWords-2); } @@ -3175,7 +3169,7 @@ CompileReturnInternal( unsigned char op, int code, int level, - Tcl_Obj *returnOpts) + Tcl_Obj *returnOpts) { TclEmitPush(TclAddLiteralObj(envPtr, returnOpts, NULL), envPtr); TclEmitInstInt4(op, code, envPtr); @@ -3543,7 +3537,7 @@ TclCompileSwitchCmd( int numWords; /* Number of words in command. */ Tcl_Token *valueTokenPtr; /* Token for the value to switch on. */ - enum {Switch_Exact, Switch_Glob} mode; + enum {Switch_Exact, Switch_Glob, Switch_Regexp} mode; /* What kind of switch are we doing? */ Tcl_Token *bodyTokenArray; /* Array of real pattern list items. */ @@ -3571,12 +3565,14 @@ TclCompileSwitchCmd( /* * Only handle the following versions: - * switch -- word {pattern body ...} - * switch -exact -- word {pattern body ...} - * switch -glob -- word {pattern body ...} - * switch -- word simpleWordPattern simpleWordBody ... - * switch -exact -- word simpleWordPattern simpleWordBody ... - * switch -glob -- word simpleWordPattern simpleWordBody ... + * switch -- word {pattern body ...} + * switch -exact -- word {pattern body ...} + * switch -glob -- word {pattern body ...} + * switch -regexp -- word {pattern body ...} + * switch -- word simpleWordPattern simpleWordBody ... + * switch -exact -- word simpleWordPattern simpleWordBody ... + * switch -glob -- word simpleWordPattern simpleWordBody ... + * switch -regexp -- word simpleWordPattern simpleWordBody ... * When the mode is -glob, can also handle a -nocase flag. * * First off, we don't care how the command's word was generated; we're @@ -3628,6 +3624,14 @@ TclCompileSwitchCmd( foundMode = 1; valueIndex++; continue; + } else if ((size <= 7) && !memcmp(chrs, "-regexp", size)) { + if (foundMode) { + return TCL_ERROR; + } + mode = Switch_Regexp; + foundMode = 1; + valueIndex++; + continue; } else if ((size <= 7) && !memcmp(chrs, "-nocase", size)) { noCase = 1; valueIndex++; @@ -3651,7 +3655,7 @@ TclCompileSwitchCmd( } tokenPtr = TokenAfter(tokenPtr); numWords--; - if (noCase && (mode == Switch_Exact)) { + if (noCase && (mode != Switch_Exact)) { /* * Can't compile this case; no opcode for case-insensitive equality! */ @@ -4063,19 +4067,68 @@ TclCompileSwitchCmd( if (i!=numWords-2 || bodyToken[numWords-2]->size != 7 || memcmp(bodyToken[numWords-2]->start, "default", 7)) { /* - * Generate the test for the arm. This code is slightly - * inefficient, but much simpler than the first version. + * Generate the test for the arm. */ - TclCompileTokens(interp, bodyToken[i], 1, envPtr); - TclEmitInstInt4(INST_OVER, 1, envPtr); switch (mode) { case Switch_Exact: + TclEmitOpcode(INST_DUP, envPtr); + TclCompileTokens(interp, bodyToken[i], 1, envPtr); TclEmitOpcode(INST_STR_EQ, envPtr); break; case Switch_Glob: + TclCompileTokens(interp, bodyToken[i], 1, envPtr); + TclEmitInstInt4(INST_OVER, 1, envPtr); TclEmitInstInt1(INST_STR_MATCH, noCase, envPtr); break; + case Switch_Regexp: { + int simple = 0, exact = 0; + + if (bodyToken[i]->type == TCL_TOKEN_TEXT) { + Tcl_DString ds; + + simple = 1; + if (bodyToken[i]->size == 0) { + /* + * The semantics of regexps are that they always match + * when the RE == "". + */ + + PushLiteral(envPtr, "1", 1); + break; + } + + /* + * Attempt to convert pattern to glob. If successful, push + * the converted pattern. + */ + + Tcl_DStringInit(&ds); + if (TclReToGlob(NULL, bodyToken[i]->start, + bodyToken[i]->size, &ds, &exact) != TCL_OK) { + TclCompileTokens(interp, bodyToken[i], 1, envPtr); + simple = 0; + } else { + PushLiteral(envPtr, Tcl_DStringValue(&ds), + Tcl_DStringLength(&ds)); + } + Tcl_DStringFree(&ds); + } else { + TclCompileTokens(interp, bodyToken[i], 1, envPtr); + } + + TclEmitInstInt4(INST_OVER, 1, envPtr); + if (simple) { + if (exact && !noCase) { + TclEmitOpcode(INST_STR_EQ, envPtr); + } else { + TclEmitInstInt1(INST_STR_MATCH, noCase, envPtr); + } + } else { + TclEmitInstInt1(INST_REGEXP, noCase, envPtr); + } + break; + } default: Tcl_Panic("unknown switch mode: %d", mode); } @@ -4449,7 +4502,6 @@ TclCompileWhileCmd( } } - /* * Set the loop's body, continue and break offsets. */ @@ -5233,7 +5285,6 @@ TclCompileDivOpCmd( } return TCL_OK; } - /* *---------------------------------------------------------------------- @@ -5246,7 +5297,7 @@ TclCompileDivOpCmd( * * Results: * Returns the variable's index in the table of compiled locals if the - * tail is known at compile time, or -1 otherwise. + * tail is known at compile time, or -1 otherwise. * * Side effects: * None. @@ -5258,14 +5309,14 @@ static int IndexTailVarIfKnown( Tcl_Interp *interp, Tcl_Token *varTokenPtr, /* Token representing the variable name */ - CompileEnv *envPtr) /* Holds resulting instructions. */ + CompileEnv *envPtr) /* Holds resulting instructions. */ { Tcl_Obj *tailPtr; const char *tailName, *p; int len, n = varTokenPtr->numComponents; Tcl_Token *lastTokenPtr; int full, localIndex; - + /* * Determine if the tail is (a) known at compile time, and (b) not an * array element. Should any of these fail, return an error so that @@ -5285,13 +5336,13 @@ IndexTailVarIfKnown( lastTokenPtr = varTokenPtr; } else { full = 0; - lastTokenPtr = varTokenPtr + n; + lastTokenPtr = varTokenPtr + n; if (!TclWordKnownAtCompileTime(lastTokenPtr, tailPtr)) { Tcl_DecrRefCount(tailPtr); return -1; } } - + tailName = TclGetStringFromObj(tailPtr, &len); if (len) { @@ -5299,7 +5350,7 @@ IndexTailVarIfKnown( /* * Possible array: bail out */ - + Tcl_DecrRefCount(tailPtr); return -1; } @@ -5307,7 +5358,7 @@ IndexTailVarIfKnown( /* * Get the tail: immediately after the last '::' */ - + for(p = tailName + len -1; p > tailName; p--) { if ((*p == ':') && (*(p-1) == ':')) { p++; @@ -5331,7 +5382,6 @@ IndexTailVarIfKnown( Tcl_DecrRefCount(tailPtr); return localIndex; } - /* *---------------------------------------------------------------------- @@ -5359,22 +5409,21 @@ TclCompileUpvarCmd( CompileEnv *envPtr) /* Holds resulting instructions. */ { Tcl_Token *tokenPtr, *otherTokenPtr, *localTokenPtr; - int simpleVarName, isScalar, localIndex, numWords, i; + int simpleVarName, isScalar, localIndex, numWords, i; DefineLineInformation; /* TIP #280 */ Tcl_Obj *objPtr = Tcl_NewObj(); - + if (envPtr->procPtr == NULL) { Tcl_DecrRefCount(objPtr); return TCL_ERROR; } - + numWords = parsePtr->numWords; if (numWords < 3) { Tcl_DecrRefCount(objPtr); return TCL_ERROR; } - /* * Push the frame index if it is known at compile time */ @@ -5388,11 +5437,11 @@ TclCompileUpvarCmd( * Attempt to convert to a level reference. Note that TclObjGetFrame * only changes the obj type when a conversion was successful. */ - + TclObjGetFrame(interp, objPtr, &framePtr); newTypePtr = objPtr->typePtr; Tcl_DecrRefCount(objPtr); - + if (newTypePtr != typePtr) { if(numWords%2) { return TCL_ERROR; @@ -5412,7 +5461,7 @@ TclCompileUpvarCmd( Tcl_DecrRefCount(objPtr); return TCL_ERROR; } - + /* * Loop over the (otherVar, thisVar) pairs. If any of the thisVar is not a * local variable, return an error so that the non-compiled command will @@ -5432,7 +5481,7 @@ TclCompileUpvarCmd( } TclEmitInstInt4(INST_UPVAR, localIndex, envPtr); } - + /* * Pop the frame index, and set the result to empty */ @@ -5441,7 +5490,6 @@ TclCompileUpvarCmd( PushLiteral(envPtr, "", 0); return TCL_OK; } - /* *---------------------------------------------------------------------- @@ -5470,13 +5518,13 @@ TclCompileNamespaceCmd( CompileEnv *envPtr) /* Holds resulting instructions. */ { Tcl_Token *tokenPtr, *otherTokenPtr, *localTokenPtr; - int simpleVarName, isScalar, localIndex, numWords, i; + int simpleVarName, isScalar, localIndex, numWords, i; DefineLineInformation; /* TIP #280 */ - + if (envPtr->procPtr == NULL) { return TCL_ERROR; } - + /* * Only compile [namespace upvar ...]: needs an odd number of args, >=5 */ @@ -5486,7 +5534,6 @@ TclCompileNamespaceCmd( return TCL_ERROR; } - /* * Check if the second argument is "upvar" */ @@ -5525,7 +5572,7 @@ TclCompileNamespaceCmd( } TclEmitInstInt4(INST_NSUPVAR, localIndex, envPtr); } - + /* * Pop the namespace, and set the result to empty */ @@ -5534,7 +5581,6 @@ TclCompileNamespaceCmd( PushLiteral(envPtr, "", 0); return TCL_OK; } - /* *---------------------------------------------------------------------- @@ -5548,7 +5594,7 @@ TclCompileNamespaceCmd( * evaluation to runtime. * * Side effects: - * Instructions are added to envPtr to execute the "global" command at + * Instructions are added to envPtr to execute the "global" command at * runtime. * *---------------------------------------------------------------------- @@ -5562,9 +5608,9 @@ TclCompileGlobalCmd( CompileEnv *envPtr) /* Holds resulting instructions. */ { Tcl_Token *varTokenPtr; - int localIndex, numWords, i; + int localIndex, numWords, i; DefineLineInformation; /* TIP #280 */ - + numWords = parsePtr->numWords; if (numWords < 2) { return TCL_ERROR; @@ -5577,7 +5623,7 @@ TclCompileGlobalCmd( if (envPtr->procPtr == NULL) { return TCL_ERROR; } - + /* * Push the namespace */ @@ -5599,7 +5645,7 @@ TclCompileGlobalCmd( CompileWord(envPtr, varTokenPtr, interp, 1); TclEmitInstInt4(INST_NSUPVAR, localIndex, envPtr); } - + /* * Pop the namespace, and set the result to empty */ @@ -5608,7 +5654,6 @@ TclCompileGlobalCmd( PushLiteral(envPtr, "", 0); return TCL_OK; } - /* *---------------------------------------------------------------------- @@ -5622,7 +5667,7 @@ TclCompileGlobalCmd( * evaluation to runtime. * * Side effects: - * Instructions are added to envPtr to execute the "variable" command at + * Instructions are added to envPtr to execute the "variable" command at * runtime. * *---------------------------------------------------------------------- @@ -5636,9 +5681,9 @@ TclCompileVariableCmd( CompileEnv *envPtr) /* Holds resulting instructions. */ { Tcl_Token *varTokenPtr, *valueTokenPtr; - int localIndex, numWords, i; + int localIndex, numWords, i; DefineLineInformation; /* TIP #280 */ - + numWords = parsePtr->numWords; if (numWords < 2) { return TCL_ERROR; @@ -5647,13 +5692,13 @@ TclCompileVariableCmd( /* * Bail out if not compiling a proc body */ - + if (envPtr->procPtr == NULL) { return TCL_ERROR; } - + /* - * Loop over the (var, value) pairs. + * Loop over the (var, value) pairs. */ valueTokenPtr = parsePtr->tokenPtr; @@ -5666,10 +5711,10 @@ TclCompileVariableCmd( if(localIndex < 0) { return TCL_ERROR; } - + CompileWord(envPtr, varTokenPtr, interp, 1); TclEmitInstInt4(INST_VARIABLE, localIndex, envPtr); - + if (i != numWords) { /* * A value has been given: set the variable, pop the value @@ -5680,7 +5725,7 @@ TclCompileVariableCmd( TclEmitOpcode(INST_POP, envPtr); } } - + /* * Set the result to empty */ @@ -5688,7 +5733,6 @@ TclCompileVariableCmd( PushLiteral(envPtr, "", 0); return TCL_OK; } - /* * Local Variables: -- cgit v0.12