summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2007-11-13 22:44:00 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2007-11-13 22:44:00 (GMT)
commit4b4b968689d3029079c5298593d1cb210189c041 (patch)
treedefddd8310487f45fbe1bccd5143d4ce87a04897
parent6534b24e483a1caa04b6194cf5d67b6e670e57ec (diff)
downloadtcl-4b4b968689d3029079c5298593d1cb210189c041.zip
tcl-4b4b968689d3029079c5298593d1cb210189c041.tar.gz
tcl-4b4b968689d3029079c5298593d1cb210189c041.tar.bz2
Extend [switch] compiler to handle regular expressions as long as things are not
too complex. Fix [regexp] compiler so that non-trivial literal regexps get fed to INST_REGEXP.
-rw-r--r--ChangeLog5
-rw-r--r--generic/tclCompCmds.c186
2 files changed, 120 insertions, 71 deletions
diff --git a/ChangeLog b/ChangeLog
index ba8d404..4baa164 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
2007-11-13 Donal K. Fellows <dkf@users.sf.net>
+ * generic/tclCompCmds.c (TclCompileSwitchCmd, TclCompileRegexpCmd):
+ Extend [switch] compiler to handle regular expressions as long as
+ things are not too complex. Fix [regexp] compiler so that non-trivial
+ literal regexps get fed to INST_REGEXP.
+
* doc/mathop.n: Clarify definitions of some operations.
2007-11-13 Miguel Sofer <msofer@users.sf.net>
diff --git a/generic/tclCompCmds.c b/generic/tclCompCmds.c
index 6179190..08b1370 100644
--- a/generic/tclCompCmds.c
+++ b/generic/tclCompCmds.c
@@ -12,7 +12,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCompCmds.c,v 1.123 2007/11/12 02:07:19 hobbs Exp $
+ * RCS: @(#) $Id: tclCompCmds.c,v 1.124 2007/11/13 22:44:01 dkf Exp $
*/
#include "tclInt.h"
@@ -61,7 +61,6 @@
TclCompileCmdWord((interp), (tokenPtr)+1, (tokenPtr)->numComponents, \
(envPtr))
-
/*
* Convenience macro for use when compiling tokens to be pushed. The ANSI C
* "prototype" for this macro is:
@@ -916,8 +915,7 @@ TclCompileDictCmd(
Tcl_Token **keyTokenPtrs, *dictVarTokenPtr, *bodyTokenPtr;
DictUpdateInfo *duiPtr;
JumpFixup jumpFixup;
-
-
+
/*
* Parse the command. Expect the following:
* dict update <lit(eral)> <any> <lit> ?<any> <lit> ...? <lit>
@@ -995,9 +993,9 @@ TclCompileDictCmd(
/*
* Normal termination code: the stack has the key list below the
* result of the body evaluation: swap them and finish the update
- * code.
+ * code.
*/
-
+
TclEmitOpcode( INST_END_CATCH, envPtr);
TclEmitInstInt4( INST_REVERSE, 2, envPtr);
TclEmitInstInt4( INST_DICT_UPDATE_END, dictIndex, envPtr);
@@ -1006,7 +1004,7 @@ TclCompileDictCmd(
/*
* Jump around the exceptional termination code
*/
-
+
TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP, &jumpFixup);
/*
@@ -1014,7 +1012,7 @@ TclCompileDictCmd(
* options in the stack, bring up the key list, finish the update
* code, and finally return with the catched return data
*/
-
+
ExceptionRangeTarget(envPtr, range, catchOffset);
TclEmitOpcode( INST_PUSH_RESULT, envPtr);
TclEmitOpcode( INST_PUSH_RETURN_OPTIONS, envPtr);
@@ -1025,7 +1023,6 @@ TclCompileDictCmd(
TclEmitInt4( infoIndex, envPtr);
TclEmitOpcode( INST_RETURN_STK, envPtr);
-
if (TclFixupForwardJumpToHere(envPtr, &jumpFixup, 127)) {
Tcl_Panic("TclCompileDictCmd(update): bad jump distance %d",
CurrentOffset(envPtr) - jumpFixup.codeOffset);
@@ -1303,7 +1300,6 @@ TclCompileForCmd(
envPtr->currStackDepth = savedStackDepth + 1;
TclEmitOpcode(INST_POP, envPtr);
-
/*
* Compile the "next" subcommand.
*/
@@ -1890,7 +1886,6 @@ TclCompileIfCmd(
tokenPtr = TokenAfter(tokenPtr);
}
-
TclInitJumpFixupArray(&jumpFalseFixupArray);
TclInitJumpFixupArray(&jumpEndFixupArray);
code = TCL_OK;
@@ -1929,7 +1924,6 @@ TclCompileIfCmd(
envPtr->currStackDepth = savedStackDepth;
testTokenPtr = tokenPtr;
-
if (realCond) {
/*
* Find out if the condition is a constant.
@@ -1964,7 +1958,6 @@ TclCompileIfCmd(
code = TCL_OK;
}
-
/*
* Skip over the optional "then" before the then clause.
*/
@@ -2976,12 +2969,13 @@ TclCompileRegexpCmd(
if (TclReToGlob(NULL, varTokenPtr[1].start, len, &ds, &exact)
!= TCL_OK) {
- return TCL_ERROR;
+ simple = 0;
+ } else {
+ PushLiteral(envPtr, Tcl_DStringValue(&ds),Tcl_DStringLength(&ds));
}
-
- PushLiteral(envPtr, Tcl_DStringValue(&ds), Tcl_DStringLength(&ds));
Tcl_DStringFree(&ds);
- } else {
+ }
+ if (!simple) {
CompileWord(envPtr, varTokenPtr, interp, parsePtr->numWords-2);
}
@@ -3175,7 +3169,7 @@ CompileReturnInternal(
unsigned char op,
int code,
int level,
- Tcl_Obj *returnOpts)
+ Tcl_Obj *returnOpts)
{
TclEmitPush(TclAddLiteralObj(envPtr, returnOpts, NULL), envPtr);
TclEmitInstInt4(op, code, envPtr);
@@ -3543,7 +3537,7 @@ TclCompileSwitchCmd(
int numWords; /* Number of words in command. */
Tcl_Token *valueTokenPtr; /* Token for the value to switch on. */
- enum {Switch_Exact, Switch_Glob} mode;
+ enum {Switch_Exact, Switch_Glob, Switch_Regexp} mode;
/* What kind of switch are we doing? */
Tcl_Token *bodyTokenArray; /* Array of real pattern list items. */
@@ -3571,12 +3565,14 @@ TclCompileSwitchCmd(
/*
* Only handle the following versions:
- * switch -- word {pattern body ...}
- * switch -exact -- word {pattern body ...}
- * switch -glob -- word {pattern body ...}
- * switch -- word simpleWordPattern simpleWordBody ...
- * switch -exact -- word simpleWordPattern simpleWordBody ...
- * switch -glob -- word simpleWordPattern simpleWordBody ...
+ * switch -- word {pattern body ...}
+ * switch -exact -- word {pattern body ...}
+ * switch -glob -- word {pattern body ...}
+ * switch -regexp -- word {pattern body ...}
+ * switch -- word simpleWordPattern simpleWordBody ...
+ * switch -exact -- word simpleWordPattern simpleWordBody ...
+ * switch -glob -- word simpleWordPattern simpleWordBody ...
+ * switch -regexp -- word simpleWordPattern simpleWordBody ...
* When the mode is -glob, can also handle a -nocase flag.
*
* First off, we don't care how the command's word was generated; we're
@@ -3628,6 +3624,14 @@ TclCompileSwitchCmd(
foundMode = 1;
valueIndex++;
continue;
+ } else if ((size <= 7) && !memcmp(chrs, "-regexp", size)) {
+ if (foundMode) {
+ return TCL_ERROR;
+ }
+ mode = Switch_Regexp;
+ foundMode = 1;
+ valueIndex++;
+ continue;
} else if ((size <= 7) && !memcmp(chrs, "-nocase", size)) {
noCase = 1;
valueIndex++;
@@ -3651,7 +3655,7 @@ TclCompileSwitchCmd(
}
tokenPtr = TokenAfter(tokenPtr);
numWords--;
- if (noCase && (mode == Switch_Exact)) {
+ if (noCase && (mode != Switch_Exact)) {
/*
* Can't compile this case; no opcode for case-insensitive equality!
*/
@@ -4063,19 +4067,68 @@ TclCompileSwitchCmd(
if (i!=numWords-2 || bodyToken[numWords-2]->size != 7 ||
memcmp(bodyToken[numWords-2]->start, "default", 7)) {
/*
- * Generate the test for the arm. This code is slightly
- * inefficient, but much simpler than the first version.
+ * Generate the test for the arm.
*/
- TclCompileTokens(interp, bodyToken[i], 1, envPtr);
- TclEmitInstInt4(INST_OVER, 1, envPtr);
switch (mode) {
case Switch_Exact:
+ TclEmitOpcode(INST_DUP, envPtr);
+ TclCompileTokens(interp, bodyToken[i], 1, envPtr);
TclEmitOpcode(INST_STR_EQ, envPtr);
break;
case Switch_Glob:
+ TclCompileTokens(interp, bodyToken[i], 1, envPtr);
+ TclEmitInstInt4(INST_OVER, 1, envPtr);
TclEmitInstInt1(INST_STR_MATCH, noCase, envPtr);
break;
+ case Switch_Regexp: {
+ int simple = 0, exact = 0;
+
+ if (bodyToken[i]->type == TCL_TOKEN_TEXT) {
+ Tcl_DString ds;
+
+ simple = 1;
+ if (bodyToken[i]->size == 0) {
+ /*
+ * The semantics of regexps are that they always match
+ * when the RE == "".
+ */
+
+ PushLiteral(envPtr, "1", 1);
+ break;
+ }
+
+ /*
+ * Attempt to convert pattern to glob. If successful, push
+ * the converted pattern.
+ */
+
+ Tcl_DStringInit(&ds);
+ if (TclReToGlob(NULL, bodyToken[i]->start,
+ bodyToken[i]->size, &ds, &exact) != TCL_OK) {
+ TclCompileTokens(interp, bodyToken[i], 1, envPtr);
+ simple = 0;
+ } else {
+ PushLiteral(envPtr, Tcl_DStringValue(&ds),
+ Tcl_DStringLength(&ds));
+ }
+ Tcl_DStringFree(&ds);
+ } else {
+ TclCompileTokens(interp, bodyToken[i], 1, envPtr);
+ }
+
+ TclEmitInstInt4(INST_OVER, 1, envPtr);
+ if (simple) {
+ if (exact && !noCase) {
+ TclEmitOpcode(INST_STR_EQ, envPtr);
+ } else {
+ TclEmitInstInt1(INST_STR_MATCH, noCase, envPtr);
+ }
+ } else {
+ TclEmitInstInt1(INST_REGEXP, noCase, envPtr);
+ }
+ break;
+ }
default:
Tcl_Panic("unknown switch mode: %d", mode);
}
@@ -4449,7 +4502,6 @@ TclCompileWhileCmd(
}
}
-
/*
* Set the loop's body, continue and break offsets.
*/
@@ -5233,7 +5285,6 @@ TclCompileDivOpCmd(
}
return TCL_OK;
}
-
/*
*----------------------------------------------------------------------
@@ -5246,7 +5297,7 @@ TclCompileDivOpCmd(
*
* Results:
* Returns the variable's index in the table of compiled locals if the
- * tail is known at compile time, or -1 otherwise.
+ * tail is known at compile time, or -1 otherwise.
*
* Side effects:
* None.
@@ -5258,14 +5309,14 @@ static int
IndexTailVarIfKnown(
Tcl_Interp *interp,
Tcl_Token *varTokenPtr, /* Token representing the variable name */
- CompileEnv *envPtr) /* Holds resulting instructions. */
+ CompileEnv *envPtr) /* Holds resulting instructions. */
{
Tcl_Obj *tailPtr;
const char *tailName, *p;
int len, n = varTokenPtr->numComponents;
Tcl_Token *lastTokenPtr;
int full, localIndex;
-
+
/*
* Determine if the tail is (a) known at compile time, and (b) not an
* array element. Should any of these fail, return an error so that
@@ -5285,13 +5336,13 @@ IndexTailVarIfKnown(
lastTokenPtr = varTokenPtr;
} else {
full = 0;
- lastTokenPtr = varTokenPtr + n;
+ lastTokenPtr = varTokenPtr + n;
if (!TclWordKnownAtCompileTime(lastTokenPtr, tailPtr)) {
Tcl_DecrRefCount(tailPtr);
return -1;
}
}
-
+
tailName = TclGetStringFromObj(tailPtr, &len);
if (len) {
@@ -5299,7 +5350,7 @@ IndexTailVarIfKnown(
/*
* Possible array: bail out
*/
-
+
Tcl_DecrRefCount(tailPtr);
return -1;
}
@@ -5307,7 +5358,7 @@ IndexTailVarIfKnown(
/*
* Get the tail: immediately after the last '::'
*/
-
+
for(p = tailName + len -1; p > tailName; p--) {
if ((*p == ':') && (*(p-1) == ':')) {
p++;
@@ -5331,7 +5382,6 @@ IndexTailVarIfKnown(
Tcl_DecrRefCount(tailPtr);
return localIndex;
}
-
/*
*----------------------------------------------------------------------
@@ -5359,22 +5409,21 @@ TclCompileUpvarCmd(
CompileEnv *envPtr) /* Holds resulting instructions. */
{
Tcl_Token *tokenPtr, *otherTokenPtr, *localTokenPtr;
- int simpleVarName, isScalar, localIndex, numWords, i;
+ int simpleVarName, isScalar, localIndex, numWords, i;
DefineLineInformation; /* TIP #280 */
Tcl_Obj *objPtr = Tcl_NewObj();
-
+
if (envPtr->procPtr == NULL) {
Tcl_DecrRefCount(objPtr);
return TCL_ERROR;
}
-
+
numWords = parsePtr->numWords;
if (numWords < 3) {
Tcl_DecrRefCount(objPtr);
return TCL_ERROR;
}
-
/*
* Push the frame index if it is known at compile time
*/
@@ -5388,11 +5437,11 @@ TclCompileUpvarCmd(
* Attempt to convert to a level reference. Note that TclObjGetFrame
* only changes the obj type when a conversion was successful.
*/
-
+
TclObjGetFrame(interp, objPtr, &framePtr);
newTypePtr = objPtr->typePtr;
Tcl_DecrRefCount(objPtr);
-
+
if (newTypePtr != typePtr) {
if(numWords%2) {
return TCL_ERROR;
@@ -5412,7 +5461,7 @@ TclCompileUpvarCmd(
Tcl_DecrRefCount(objPtr);
return TCL_ERROR;
}
-
+
/*
* Loop over the (otherVar, thisVar) pairs. If any of the thisVar is not a
* local variable, return an error so that the non-compiled command will
@@ -5432,7 +5481,7 @@ TclCompileUpvarCmd(
}
TclEmitInstInt4(INST_UPVAR, localIndex, envPtr);
}
-
+
/*
* Pop the frame index, and set the result to empty
*/
@@ -5441,7 +5490,6 @@ TclCompileUpvarCmd(
PushLiteral(envPtr, "", 0);
return TCL_OK;
}
-
/*
*----------------------------------------------------------------------
@@ -5470,13 +5518,13 @@ TclCompileNamespaceCmd(
CompileEnv *envPtr) /* Holds resulting instructions. */
{
Tcl_Token *tokenPtr, *otherTokenPtr, *localTokenPtr;
- int simpleVarName, isScalar, localIndex, numWords, i;
+ int simpleVarName, isScalar, localIndex, numWords, i;
DefineLineInformation; /* TIP #280 */
-
+
if (envPtr->procPtr == NULL) {
return TCL_ERROR;
}
-
+
/*
* Only compile [namespace upvar ...]: needs an odd number of args, >=5
*/
@@ -5486,7 +5534,6 @@ TclCompileNamespaceCmd(
return TCL_ERROR;
}
-
/*
* Check if the second argument is "upvar"
*/
@@ -5525,7 +5572,7 @@ TclCompileNamespaceCmd(
}
TclEmitInstInt4(INST_NSUPVAR, localIndex, envPtr);
}
-
+
/*
* Pop the namespace, and set the result to empty
*/
@@ -5534,7 +5581,6 @@ TclCompileNamespaceCmd(
PushLiteral(envPtr, "", 0);
return TCL_OK;
}
-
/*
*----------------------------------------------------------------------
@@ -5548,7 +5594,7 @@ TclCompileNamespaceCmd(
* evaluation to runtime.
*
* Side effects:
- * Instructions are added to envPtr to execute the "global" command at
+ * Instructions are added to envPtr to execute the "global" command at
* runtime.
*
*----------------------------------------------------------------------
@@ -5562,9 +5608,9 @@ TclCompileGlobalCmd(
CompileEnv *envPtr) /* Holds resulting instructions. */
{
Tcl_Token *varTokenPtr;
- int localIndex, numWords, i;
+ int localIndex, numWords, i;
DefineLineInformation; /* TIP #280 */
-
+
numWords = parsePtr->numWords;
if (numWords < 2) {
return TCL_ERROR;
@@ -5577,7 +5623,7 @@ TclCompileGlobalCmd(
if (envPtr->procPtr == NULL) {
return TCL_ERROR;
}
-
+
/*
* Push the namespace
*/
@@ -5599,7 +5645,7 @@ TclCompileGlobalCmd(
CompileWord(envPtr, varTokenPtr, interp, 1);
TclEmitInstInt4(INST_NSUPVAR, localIndex, envPtr);
}
-
+
/*
* Pop the namespace, and set the result to empty
*/
@@ -5608,7 +5654,6 @@ TclCompileGlobalCmd(
PushLiteral(envPtr, "", 0);
return TCL_OK;
}
-
/*
*----------------------------------------------------------------------
@@ -5622,7 +5667,7 @@ TclCompileGlobalCmd(
* evaluation to runtime.
*
* Side effects:
- * Instructions are added to envPtr to execute the "variable" command at
+ * Instructions are added to envPtr to execute the "variable" command at
* runtime.
*
*----------------------------------------------------------------------
@@ -5636,9 +5681,9 @@ TclCompileVariableCmd(
CompileEnv *envPtr) /* Holds resulting instructions. */
{
Tcl_Token *varTokenPtr, *valueTokenPtr;
- int localIndex, numWords, i;
+ int localIndex, numWords, i;
DefineLineInformation; /* TIP #280 */
-
+
numWords = parsePtr->numWords;
if (numWords < 2) {
return TCL_ERROR;
@@ -5647,13 +5692,13 @@ TclCompileVariableCmd(
/*
* Bail out if not compiling a proc body
*/
-
+
if (envPtr->procPtr == NULL) {
return TCL_ERROR;
}
-
+
/*
- * Loop over the (var, value) pairs.
+ * Loop over the (var, value) pairs.
*/
valueTokenPtr = parsePtr->tokenPtr;
@@ -5666,10 +5711,10 @@ TclCompileVariableCmd(
if(localIndex < 0) {
return TCL_ERROR;
}
-
+
CompileWord(envPtr, varTokenPtr, interp, 1);
TclEmitInstInt4(INST_VARIABLE, localIndex, envPtr);
-
+
if (i != numWords) {
/*
* A value has been given: set the variable, pop the value
@@ -5680,7 +5725,7 @@ TclCompileVariableCmd(
TclEmitOpcode(INST_POP, envPtr);
}
}
-
+
/*
* Set the result to empty
*/
@@ -5688,7 +5733,6 @@ TclCompileVariableCmd(
PushLiteral(envPtr, "", 0);
return TCL_OK;
}
-
/*
* Local Variables: