From 6d14b815b2889457c7f12b3c881356aa5c61baa6 Mon Sep 17 00:00:00 2001 From: dkf Date: Fri, 2 Nov 2012 16:50:06 +0000 Subject: Work on compilation of [string is]. Hit some problem edge cases with differences in strictness of edge cases that will force a rethink ([string is boolean] is significantly more strict than Tcl_GetBooleanFromObj). --- generic/tclCmdMZ.c | 2 +- generic/tclCompCmdsSZ.c | 190 ++++++++++++++++++++++++++++++++++++++++++++++++ generic/tclInt.h | 3 + 3 files changed, 194 insertions(+), 1 deletion(-) diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index de32fce..0526325 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -3306,7 +3306,7 @@ TclInitStringCmd( {"equal", StringEqualCmd, TclCompileStringEqualCmd, NULL, NULL, 0}, {"first", StringFirstCmd, TclCompileStringFirstCmd, NULL, NULL, 0}, {"index", StringIndexCmd, TclCompileStringIndexCmd, NULL, NULL, 0}, - {"is", StringIsCmd, NULL, NULL, NULL, 0}, + {"is", StringIsCmd, TclCompileStringIsCmd, NULL, NULL, 0}, {"last", StringLastCmd, NULL, NULL, NULL, 0}, {"length", StringLenCmd, TclCompileStringLenCmd, NULL, NULL, 0}, {"map", StringMapCmd, TclCompileStringMapCmd, NULL, NULL, 0}, diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c index 57cb992..b9309ec 100644 --- a/generic/tclCompCmdsSZ.c +++ b/generic/tclCompCmdsSZ.c @@ -451,6 +451,196 @@ TclCompileStringIndexCmd( /* *---------------------------------------------------------------------- * + * TclCompileStringIsCmd -- + * + * Procedure called to compile the simplest and most common form of the + * "string is" command. + * + * Results: + * Returns TCL_OK for a successful compile. Returns TCL_ERROR to defer + * evaluation to runtime. + * + * Side effects: + * Instructions are added to envPtr to execute the "string is" command at + * runtime. + * + *---------------------------------------------------------------------- + */ + +int +TclCompileStringIsCmd( + Tcl_Interp *interp, /* Used for error reporting. */ + Tcl_Parse *parsePtr, /* Points to a parse structure for the command + * created by Tcl_ParseCommand. */ + Command *cmdPtr, /* Points to defintion of command being + * compiled. */ + CompileEnv *envPtr) /* Holds resulting instructions. */ +{ + DefineLineInformation; /* TIP #280 */ + Tcl_Token *tokenPtr = TokenAfter(parsePtr->tokenPtr); + int numWords = parsePtr->numWords; + enum IsType { + TypeBool, TypeBoolFalse, TypeBoolTrue, + TypeFloat, + TypeInteger, TypeNarrowInt, TypeWideInt, + TypeList /*, TypeDict */ + }; + enum IsType t; + JumpFixup jumpFixup; + int start, range; + int allowEmpty = 0; + + if (numWords < 2 || tokenPtr->type != TCL_TOKEN_SIMPLE_WORD) { + return TCL_ERROR; + } +#define GotLiteral(tokenPtr,word) \ + ((tokenPtr)[1].size > 1 && (tokenPtr)[1].start[0] == word[0] && \ + strncmp((tokenPtr)[1].start, (word), (tokenPtr)[1].size) == 0) + + if (GotLiteral(tokenPtr, "boolean")) { + t = TypeBool; + } else if (GotLiteral(tokenPtr, "double")) { + t = TypeFloat; + } else if (GotLiteral(tokenPtr, "entier")) { + t = TypeInteger; + } else if (GotLiteral(tokenPtr, "false")) { + t = TypeBoolFalse; + } else if (GotLiteral(tokenPtr, "integer")) { + t = TypeNarrowInt; + return TCL_ERROR; // Not yet implemented + } else if (GotLiteral(tokenPtr, "list")) { + t = TypeList; + } else if (GotLiteral(tokenPtr, "true")) { + t = TypeBoolTrue; + } else if (GotLiteral(tokenPtr, "wideinteger")) { + t = TypeWideInt; + return TCL_ERROR; // Not yet implemented + } else { + /* + * We don't handle character class checks in bytecode currently. + */ + + return TCL_ERROR; + } + if (numWords != 3 && numWords != 4) { + return TCL_ERROR; + } + tokenPtr = TokenAfter(tokenPtr); + if (numWords == 3) { + allowEmpty = (t != TypeList); + } else { + if (!GotLiteral(tokenPtr, "-strict")) { + return TCL_ERROR; + } + tokenPtr = TokenAfter(tokenPtr); + } +#undef GotLiteral + + /* + * Push the word to check. + */ + + CompileWord(envPtr, tokenPtr, interp, numWords-1); + + /* + * Next, do the type check. First, we push a catch range; most of the + * type-check operations throw an exception on failure. + */ + + range = DeclareExceptionRange(envPtr, CATCH_EXCEPTION_RANGE); + start = 0; + TclEmitInstInt4( INST_BEGIN_CATCH4, range, envPtr); + ExceptionRangeStarts(envPtr, range); + + /* + * Issue the type-check itself for the specific type. + */ + + switch (t) { + case TypeBool: + TclEmitOpcode( INST_DUP, envPtr); + TclEmitOpcode( INST_LNOT, envPtr); + TclEmitOpcode( INST_POP, envPtr); + break; + case TypeBoolFalse: + TclEmitOpcode( INST_DUP, envPtr); + start = CurrentOffset(envPtr); + TclEmitInstInt1( INST_JUMP_TRUE1, 0, envPtr); + break; + case TypeBoolTrue: + TclEmitOpcode( INST_DUP, envPtr); + start = CurrentOffset(envPtr); + TclEmitInstInt1( INST_JUMP_FALSE1, 0, envPtr); + break; + case TypeFloat: + /* + * Careful! Preserve behavior of NaN which is a double (that is, true + * for the purposes of a type check) but most math ops fail on it. The + * key is that it is not == to itself (and is the only value which + * this is true for). + */ + + TclEmitOpcode( INST_DUP, envPtr); + TclEmitOpcode( INST_DUP, envPtr); + TclEmitOpcode( INST_NEQ, envPtr); + TclEmitInstInt1( INST_JUMP_TRUE1, 5, envPtr); + + /* + * Type check for all other double values. + */ + + TclEmitOpcode( INST_DUP, envPtr); + TclEmitOpcode( INST_UMINUS, envPtr); + TclEmitOpcode( INST_POP, envPtr); + break; + case TypeInteger: + TclEmitOpcode( INST_DUP, envPtr); + TclEmitOpcode( INST_BITNOT, envPtr); + TclEmitOpcode( INST_POP, envPtr); + break; + case TypeNarrowInt: + Tcl_Panic("not yet implemented"); + case TypeWideInt: + Tcl_Panic("not yet implemented"); + case TypeList: + TclEmitOpcode( INST_DUP, envPtr); + TclEmitOpcode( INST_LIST_LENGTH, envPtr); + TclEmitOpcode( INST_POP, envPtr); + break; + } + + /* + * Based on whether the exception was thrown (or conditional branch taken, + * in the case of true/false checks), push the correct boolean value. This + * is also where we deal with what happens with empty values in non-strict + * mode. + */ + + ExceptionRangeEnds(envPtr, range); + TclEmitOpcode( INST_END_CATCH, envPtr); + TclEmitOpcode( INST_POP, envPtr); + PushLiteral(envPtr, "1", 1); + TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP, &jumpFixup); + ExceptionRangeTarget(envPtr, range, catchOffset); + if (start != 0) { + TclStoreInt1AtPtr(CurrentOffset(envPtr) - start, + envPtr->codeStart + start + 1); + } + TclEmitOpcode( INST_END_CATCH, envPtr); + if (allowEmpty) { + PushLiteral(envPtr, "", 0); + TclEmitOpcode( INST_STR_EQ, envPtr); + } else { + TclEmitOpcode( INST_POP, envPtr); + PushLiteral(envPtr, "0", 1); + } + TclFixupForwardJumpToHere(envPtr, &jumpFixup, 127); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * * TclCompileStringMatchCmd -- * * Procedure called to compile the simplest and most common form of the diff --git a/generic/tclInt.h b/generic/tclInt.h index 1fffa1f..e513a6e 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3647,6 +3647,9 @@ MODULE_SCOPE int TclCompileStringFirstCmd(Tcl_Interp *interp, MODULE_SCOPE int TclCompileStringIndexCmd(Tcl_Interp *interp, Tcl_Parse *parsePtr, Command *cmdPtr, struct CompileEnv *envPtr); +MODULE_SCOPE int TclCompileStringIsCmd(Tcl_Interp *interp, + Tcl_Parse *parsePtr, Command *cmdPtr, + struct CompileEnv *envPtr); MODULE_SCOPE int TclCompileStringLenCmd(Tcl_Interp *interp, Tcl_Parse *parsePtr, Command *cmdPtr, struct CompileEnv *envPtr); -- cgit v0.12 From 430c9bc178a61061a4e2a4bdc1552f31f936f306 Mon Sep 17 00:00:00 2001 From: dkf Date: Thu, 2 Jan 2014 15:30:53 +0000 Subject: redevelop code to have more in common with the interpreted [string is] and to remove non-working types --- generic/tclCompCmdsSZ.c | 147 +++++++++++++++++++++++++----------------------- 1 file changed, 78 insertions(+), 69 deletions(-) diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c index 440b5bf..06cca50 100644 --- a/generic/tclCompCmdsSZ.c +++ b/generic/tclCompCmdsSZ.c @@ -435,56 +435,59 @@ TclCompileStringIsCmd( { DefineLineInformation; /* TIP #280 */ Tcl_Token *tokenPtr = TokenAfter(parsePtr->tokenPtr); - int numWords = parsePtr->numWords; - enum IsType { - TypeBool, TypeBoolFalse, TypeBoolTrue, - TypeFloat, - TypeInteger, TypeNarrowInt, TypeWideInt, - TypeList /*, TypeDict */ + static const char *const isClasses[] = { + "alnum", "alpha", "ascii", "control", + "boolean", "digit", "double", "entier", + "false", "graph", "integer", "list", + "lower", "print", "punct", "space", + "true", "upper", "wideinteger", "wordchar", + "xdigit", NULL + }; + enum isClasses { + STR_IS_ALNUM, STR_IS_ALPHA, STR_IS_ASCII, STR_IS_CONTROL, + STR_IS_BOOL, STR_IS_DIGIT, STR_IS_DOUBLE, STR_IS_ENTIER, + STR_IS_FALSE, STR_IS_GRAPH, STR_IS_INT, STR_IS_LIST, + STR_IS_LOWER, STR_IS_PRINT, STR_IS_PUNCT, STR_IS_SPACE, + STR_IS_TRUE, STR_IS_UPPER, STR_IS_WIDE, STR_IS_WORD, + STR_IS_XDIGIT }; - enum IsType t; JumpFixup jumpFixup; - int start, range; - int allowEmpty = 0; + int t, range, allowEmpty = 0; + Tcl_Obj *isClass; - if (numWords < 2 || tokenPtr->type != TCL_TOKEN_SIMPLE_WORD) { + if (parsePtr->numWords < 3 || parsePtr->numWords > 6) { return TCL_ERROR; } -#define GotLiteral(tokenPtr,word) \ - ((tokenPtr)[1].size > 1 && (tokenPtr)[1].start[0] == word[0] && \ - strncmp((tokenPtr)[1].start, (word), (tokenPtr)[1].size) == 0) - - if (GotLiteral(tokenPtr, "boolean")) { - t = TypeBool; - } else if (GotLiteral(tokenPtr, "double")) { - t = TypeFloat; - } else if (GotLiteral(tokenPtr, "entier")) { - t = TypeInteger; - } else if (GotLiteral(tokenPtr, "false")) { - t = TypeBoolFalse; - } else if (GotLiteral(tokenPtr, "integer")) { - t = TypeNarrowInt; - return TCL_ERROR; // Not yet implemented - } else if (GotLiteral(tokenPtr, "list")) { - t = TypeList; - } else if (GotLiteral(tokenPtr, "true")) { - t = TypeBoolTrue; - } else if (GotLiteral(tokenPtr, "wideinteger")) { - t = TypeWideInt; - return TCL_ERROR; // Not yet implemented - } else { - /* - * We don't handle character class checks in bytecode currently. - */ - + isClass = Tcl_NewObj(); + if (!TclWordKnownAtCompileTime(tokenPtr, isClass)) { + Tcl_DecrRefCount(isClass); return TCL_ERROR; + } else if (Tcl_GetIndexFromObj(interp, isClass, isClasses, "class", 0, + &t) != TCL_OK) { + Tcl_DecrRefCount(isClass); + TclCompileSyntaxError(interp, envPtr); + return TCL_OK; } - if (numWords != 3 && numWords != 4) { + Tcl_DecrRefCount(isClass); + +#define GotLiteral(tokenPtr, word) \ + ((tokenPtr)->type == TCL_TOKEN_SIMPLE_WORD && \ + (tokenPtr)[1].size > 1 && \ + (tokenPtr)[1].start[0] == word[0] && \ + strncmp((tokenPtr)[1].start, (word), (tokenPtr)[1].size) == 0) + + /* + * Cannot handle the -failindex option at all, and that's the only legal + * way to have more than 4 arguments. + */ + + if (parsePtr->numWords != 3 && parsePtr->numWords != 4) { return TCL_ERROR; } + tokenPtr = TokenAfter(tokenPtr); - if (numWords == 3) { - allowEmpty = (t != TypeList); + if (parsePtr->numWords == 3) { + allowEmpty = (t != STR_IS_LIST); } else { if (!GotLiteral(tokenPtr, "-strict")) { return TCL_ERROR; @@ -494,18 +497,47 @@ TclCompileStringIsCmd( #undef GotLiteral /* + * Some types are not currently handled. Character classes are a prime + * example of this. + */ + + switch (t) { + case STR_IS_ALNUM: + case STR_IS_ALPHA: + case STR_IS_ASCII: + case STR_IS_CONTROL: + case STR_IS_DIGIT: + case STR_IS_GRAPH: + case STR_IS_LOWER: + case STR_IS_PRINT: + case STR_IS_PUNCT: + case STR_IS_SPACE: + case STR_IS_UPPER: + case STR_IS_WORD: + case STR_IS_XDIGIT: + return TCL_ERROR; + + case STR_IS_BOOL: + case STR_IS_FALSE: + case STR_IS_INT: + case STR_IS_TRUE: + case STR_IS_WIDE: + /* Not yet implemented */ + return TCL_ERROR; + } + + /* * Push the word to check. */ - CompileWord(envPtr, tokenPtr, interp, numWords-1); + CompileWord(envPtr, tokenPtr, interp, parsePtr->numWords-1); /* * Next, do the type check. First, we push a catch range; most of the * type-check operations throw an exception on failure. */ - range = DeclareExceptionRange(envPtr, CATCH_EXCEPTION_RANGE); - start = 0; + range = TclCreateExceptRange(CATCH_EXCEPTION_RANGE, envPtr); TclEmitInstInt4( INST_BEGIN_CATCH4, range, envPtr); ExceptionRangeStarts(envPtr, range); @@ -514,22 +546,7 @@ TclCompileStringIsCmd( */ switch (t) { - case TypeBool: - TclEmitOpcode( INST_DUP, envPtr); - TclEmitOpcode( INST_LNOT, envPtr); - TclEmitOpcode( INST_POP, envPtr); - break; - case TypeBoolFalse: - TclEmitOpcode( INST_DUP, envPtr); - start = CurrentOffset(envPtr); - TclEmitInstInt1( INST_JUMP_TRUE1, 0, envPtr); - break; - case TypeBoolTrue: - TclEmitOpcode( INST_DUP, envPtr); - start = CurrentOffset(envPtr); - TclEmitInstInt1( INST_JUMP_FALSE1, 0, envPtr); - break; - case TypeFloat: + case STR_IS_DOUBLE: /* * Careful! Preserve behavior of NaN which is a double (that is, true * for the purposes of a type check) but most math ops fail on it. The @@ -550,16 +567,12 @@ TclCompileStringIsCmd( TclEmitOpcode( INST_UMINUS, envPtr); TclEmitOpcode( INST_POP, envPtr); break; - case TypeInteger: + case STR_IS_ENTIER: TclEmitOpcode( INST_DUP, envPtr); TclEmitOpcode( INST_BITNOT, envPtr); TclEmitOpcode( INST_POP, envPtr); break; - case TypeNarrowInt: - Tcl_Panic("not yet implemented"); - case TypeWideInt: - Tcl_Panic("not yet implemented"); - case TypeList: + case STR_IS_LIST: TclEmitOpcode( INST_DUP, envPtr); TclEmitOpcode( INST_LIST_LENGTH, envPtr); TclEmitOpcode( INST_POP, envPtr); @@ -579,10 +592,6 @@ TclCompileStringIsCmd( PushLiteral(envPtr, "1", 1); TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP, &jumpFixup); ExceptionRangeTarget(envPtr, range, catchOffset); - if (start != 0) { - TclStoreInt1AtPtr(CurrentOffset(envPtr) - start, - envPtr->codeStart + start + 1); - } TclEmitOpcode( INST_END_CATCH, envPtr); if (allowEmpty) { PushLiteral(envPtr, "", 0); -- cgit v0.12 From 29ae06c2d37c62b89dbc679c87cffd292b077cbf Mon Sep 17 00:00:00 2001 From: dkf Date: Thu, 9 Jan 2014 10:49:04 +0000 Subject: use compact form --- generic/tclCompCmdsSZ.c | 51 ++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c index 06cca50..345dd9f 100644 --- a/generic/tclCompCmdsSZ.c +++ b/generic/tclCompCmdsSZ.c @@ -451,8 +451,7 @@ TclCompileStringIsCmd( STR_IS_TRUE, STR_IS_UPPER, STR_IS_WIDE, STR_IS_WORD, STR_IS_XDIGIT }; - JumpFixup jumpFixup; - int t, range, allowEmpty = 0; + int t, range, allowEmpty = 0, end; Tcl_Obj *isClass; if (parsePtr->numWords < 3 || parsePtr->numWords > 6) { @@ -538,7 +537,7 @@ TclCompileStringIsCmd( */ range = TclCreateExceptRange(CATCH_EXCEPTION_RANGE, envPtr); - TclEmitInstInt4( INST_BEGIN_CATCH4, range, envPtr); + OP4( BEGIN_CATCH4, range); ExceptionRangeStarts(envPtr, range); /* @@ -554,28 +553,28 @@ TclCompileStringIsCmd( * this is true for). */ - TclEmitOpcode( INST_DUP, envPtr); - TclEmitOpcode( INST_DUP, envPtr); - TclEmitOpcode( INST_NEQ, envPtr); - TclEmitInstInt1( INST_JUMP_TRUE1, 5, envPtr); + OP( DUP); + OP( DUP); + OP( NEQ); + OP1( JUMP_TRUE1, 5); /* * Type check for all other double values. */ - TclEmitOpcode( INST_DUP, envPtr); - TclEmitOpcode( INST_UMINUS, envPtr); - TclEmitOpcode( INST_POP, envPtr); + OP( DUP); + OP( UMINUS); + OP( POP); break; case STR_IS_ENTIER: - TclEmitOpcode( INST_DUP, envPtr); - TclEmitOpcode( INST_BITNOT, envPtr); - TclEmitOpcode( INST_POP, envPtr); + OP( DUP); + OP( BITNOT); + OP( POP); break; case STR_IS_LIST: - TclEmitOpcode( INST_DUP, envPtr); - TclEmitOpcode( INST_LIST_LENGTH, envPtr); - TclEmitOpcode( INST_POP, envPtr); + OP( DUP); + OP( LIST_LENGTH); + OP( POP); break; } @@ -587,20 +586,20 @@ TclCompileStringIsCmd( */ ExceptionRangeEnds(envPtr, range); - TclEmitOpcode( INST_END_CATCH, envPtr); - TclEmitOpcode( INST_POP, envPtr); - PushLiteral(envPtr, "1", 1); - TclEmitForwardJump(envPtr, TCL_UNCONDITIONAL_JUMP, &jumpFixup); + OP( END_CATCH); + OP( POP); + PUSH( "1"); + JUMP1( JUMP, end); ExceptionRangeTarget(envPtr, range, catchOffset); - TclEmitOpcode( INST_END_CATCH, envPtr); + OP( END_CATCH); if (allowEmpty) { - PushLiteral(envPtr, "", 0); - TclEmitOpcode( INST_STR_EQ, envPtr); + PUSH( ""); + OP( STR_EQ); } else { - TclEmitOpcode( INST_POP, envPtr); - PushLiteral(envPtr, "0", 1); + OP( POP); + PUSH( "0"); } - TclFixupForwardJumpToHere(envPtr, &jumpFixup, 127); + FIXJUMP1( end); return TCL_OK; } -- cgit v0.12 From 2803a8a05559eeba811d825d94efcb3acc1c9eb5 Mon Sep 17 00:00:00 2001 From: dkf Date: Fri, 10 Jan 2014 15:58:41 +0000 Subject: a different approach --- generic/tclAssembly.c | 4 +- generic/tclCompCmdsSZ.c | 150 ++++++++++++++++++++++++++---------------------- generic/tclCompile.c | 4 ++ generic/tclCompile.h | 4 +- generic/tclExecute.c | 8 +++ 5 files changed, 98 insertions(+), 72 deletions(-) diff --git a/generic/tclAssembly.c b/generic/tclAssembly.c index 89c286a..70379c6 100644 --- a/generic/tclAssembly.c +++ b/generic/tclAssembly.c @@ -437,6 +437,7 @@ static const TalInstDesc TalInstructionTable[] = { {"nop", ASSEM_1BYTE, INST_NOP, 0, 0}, {"not", ASSEM_1BYTE, INST_LNOT, 1, 1}, {"nsupvar", ASSEM_LVT4, INST_NSUPVAR, 2, 1}, + {"numericType", ASSEM_1BYTE, INST_NUM_TYPE, 1, 1}, {"originCmd", ASSEM_1BYTE, INST_ORIGIN_COMMAND, 1, 1}, {"over", ASSEM_OVER, INST_OVER, INT_MIN,-1-1}, {"pop", ASSEM_1BYTE, INST_POP, 1, 0}, @@ -516,7 +517,8 @@ static const unsigned char NonThrowingByteCodes[] = { INST_RESOLVE_COMMAND, /* 154 */ INST_STR_TRIM, INST_STR_TRIM_LEFT, INST_STR_TRIM_RIGHT, /* 166-168 */ INST_CONCAT_STK, /* 169 */ - INST_STR_UPPER, INST_STR_LOWER, INST_STR_TITLE /* 170-172 */ + INST_STR_UPPER, INST_STR_LOWER, INST_STR_TITLE, /* 170-172 */ + INST_NUM_TYPE /* 180 */ }; /* diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c index 345dd9f..1436a20 100644 --- a/generic/tclCompCmdsSZ.c +++ b/generic/tclCompCmdsSZ.c @@ -500,7 +500,7 @@ TclCompileStringIsCmd( * example of this. */ - switch (t) { + switch ((enum isClasses) t) { case STR_IS_ALNUM: case STR_IS_ALPHA: case STR_IS_ASCII: @@ -514,93 +514,103 @@ TclCompileStringIsCmd( case STR_IS_UPPER: case STR_IS_WORD: case STR_IS_XDIGIT: + /* Not yet implemented */ return TCL_ERROR; case STR_IS_BOOL: case STR_IS_FALSE: - case STR_IS_INT: case STR_IS_TRUE: - case STR_IS_WIDE: /* Not yet implemented */ return TCL_ERROR; - } - - /* - * Push the word to check. - */ - - CompileWord(envPtr, tokenPtr, interp, parsePtr->numWords-1); - /* - * Next, do the type check. First, we push a catch range; most of the - * type-check operations throw an exception on failure. - */ - - range = TclCreateExceptRange(CATCH_EXCEPTION_RANGE, envPtr); - OP4( BEGIN_CATCH4, range); - ExceptionRangeStarts(envPtr, range); - - /* - * Issue the type-check itself for the specific type. - */ + case STR_IS_DOUBLE: { + int satisfied, isEmpty; - switch (t) { - case STR_IS_DOUBLE: - /* - * Careful! Preserve behavior of NaN which is a double (that is, true - * for the purposes of a type check) but most math ops fail on it. The - * key is that it is not == to itself (and is the only value which - * this is true for). - */ + CompileWord(envPtr, tokenPtr, interp, parsePtr->numWords-1); + if (allowEmpty) { + OP( DUP); + PUSH( ""); + OP( STR_EQ); + JUMP1( JUMP_TRUE, isEmpty); + OP( NUM_TYPE); + JUMP1( JUMP_TRUE, satisfied); + PUSH( "0"); + JUMP1( JUMP, end); + FIXJUMP1( isEmpty); + OP( POP); + FIXJUMP1( satisfied); + } else { + OP( NUM_TYPE); + JUMP1( JUMP_TRUE, satisfied); + PUSH( "0"); + JUMP1( JUMP, end); + TclAdjustStackDepth(-1, envPtr); + FIXJUMP1( satisfied); + } + PUSH( "1"); + FIXJUMP1( end); + return TCL_OK; + } - OP( DUP); - OP( DUP); - OP( NEQ); - OP1( JUMP_TRUE1, 5); + case STR_IS_INT: + case STR_IS_WIDE: + case STR_IS_ENTIER: + CompileWord(envPtr, tokenPtr, interp, parsePtr->numWords-1); + if (allowEmpty) { + int testNumType; + + OP( DUP); + OP( NUM_TYPE); + OP( DUP); + JUMP1( JUMP_TRUE, testNumType); + OP( POP); + PUSH( ""); + OP( STR_EQ); + JUMP1( JUMP, end); + TclAdjustStackDepth(1, envPtr); + FIXJUMP1( testNumType); + OP4( REVERSE, 2); + OP( POP); + } else { + OP( NUM_TYPE); + OP( DUP); + JUMP1( JUMP_FALSE, end); + } - /* - * Type check for all other double values. - */ + switch (t) { + case STR_IS_INT: + PUSH( "1"); + OP( EQ); + break; + case STR_IS_WIDE: + PUSH( "2"); + OP( LE); + break; + case STR_IS_ENTIER: + PUSH( "3"); + OP( LE); + break; + } + FIXJUMP1( end); + return TCL_OK; - OP( DUP); - OP( UMINUS); - OP( POP); - break; - case STR_IS_ENTIER: - OP( DUP); - OP( BITNOT); - OP( POP); - break; case STR_IS_LIST: + CompileWord(envPtr, tokenPtr, interp, parsePtr->numWords-1); + range = TclCreateExceptRange(CATCH_EXCEPTION_RANGE, envPtr); + OP4( BEGIN_CATCH4, range); + ExceptionRangeStarts(envPtr, range); OP( DUP); OP( LIST_LENGTH); OP( POP); - break; - } - - /* - * Based on whether the exception was thrown (or conditional branch taken, - * in the case of true/false checks), push the correct boolean value. This - * is also where we deal with what happens with empty values in non-strict - * mode. - */ - - ExceptionRangeEnds(envPtr, range); - OP( END_CATCH); - OP( POP); - PUSH( "1"); - JUMP1( JUMP, end); - ExceptionRangeTarget(envPtr, range, catchOffset); - OP( END_CATCH); - if (allowEmpty) { - PUSH( ""); - OP( STR_EQ); - } else { + ExceptionRangeEnds(envPtr, range); + ExceptionRangeTarget(envPtr, range, catchOffset); OP( POP); - PUSH( "0"); + OP( PUSH_RETURN_CODE); + OP( END_CATCH); + OP( LNOT); + return TCL_OK; } - FIXJUMP1( end); - return TCL_OK; + return TCL_ERROR; } int diff --git a/generic/tclCompile.c b/generic/tclCompile.c index ee67e24..c01571f 100644 --- a/generic/tclCompile.c +++ b/generic/tclCompile.c @@ -621,6 +621,10 @@ InstructionDesc const tclInstructionTable[] = { /* Push the identity of the current TclOO object (i.e., the name of * its current public access command) on the stack. */ + {"numericType", 1, 0, 0, {OPERAND_NONE}}, + /* Pushes the numeric type code of the word at the top of the stack. + * Stack: ... value => ... typeCode */ + {NULL, 0, 0, 0, {OPERAND_NONE}} }; diff --git a/generic/tclCompile.h b/generic/tclCompile.h index 6ecadf4..6bf5daf 100644 --- a/generic/tclCompile.h +++ b/generic/tclCompile.h @@ -792,8 +792,10 @@ typedef struct ByteCode { #define INST_TCLOO_NEXT 179 +#define INST_NUM_TYPE 180 + /* The last opcode */ -#define LAST_INST_OPCODE 179 +#define LAST_INST_OPCODE 180 /* * Table describing the Tcl bytecode instructions: their name (for displaying diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 5b42124..2707ec1 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5776,6 +5776,14 @@ TEBCresume( int type1, type2; long l1, l2, lResult; + case INST_NUM_TYPE: + if (GetNumberFromObj(NULL, OBJ_AT_TOS, &ptr1, &type1) != TCL_OK) { + type1 = 0; + } + TclNewIntObj(objResultPtr, type1); + TRACE(("\"%.20s\" => %d\n", O2S(OBJ_AT_TOS), type1)); + NEXT_INST_F(1, 1, 1); + case INST_EQ: case INST_NEQ: case INST_LT: -- cgit v0.12 From 12945e9a1687504978a49e6a8739ea3f1b20b02e Mon Sep 17 00:00:00 2001 From: dkf Date: Mon, 13 Jan 2014 08:06:43 +0000 Subject: extend [string is] to booleans --- generic/tclAssembly.c | 1 + generic/tclCompCmdsSZ.c | 51 +++++++++++++++++++++++++++++++++++++++++++++---- generic/tclCompile.c | 3 +++ generic/tclCompile.h | 3 ++- generic/tclExecute.c | 11 +++++++++++ 5 files changed, 64 insertions(+), 5 deletions(-) diff --git a/generic/tclAssembly.c b/generic/tclAssembly.c index 70379c6..f10bca8 100644 --- a/generic/tclAssembly.c +++ b/generic/tclAssembly.c @@ -478,6 +478,7 @@ static const TalInstDesc TalInstructionTable[] = { {"tclooIsObject", ASSEM_1BYTE, INST_TCLOO_IS_OBJECT, 1, 1}, {"tclooNamespace", ASSEM_1BYTE, INST_TCLOO_NS, 1, 1}, {"tclooSelf", ASSEM_1BYTE, INST_TCLOO_SELF, 0, 1}, + {"tryCvtToBoolean", ASSEM_1BYTE, INST_TRY_CVT_TO_BOOLEAN,1, 2}, {"tryCvtToNumeric", ASSEM_1BYTE, INST_TRY_CVT_TO_NUMERIC,1, 1}, {"uminus", ASSEM_1BYTE, INST_UMINUS, 1, 1}, {"unset", ASSEM_BOOL_LVT4,INST_UNSET_SCALAR, 0, 0}, diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c index 1436a20..91bb94c 100644 --- a/generic/tclCompCmdsSZ.c +++ b/generic/tclCompCmdsSZ.c @@ -514,14 +514,57 @@ TclCompileStringIsCmd( case STR_IS_UPPER: case STR_IS_WORD: case STR_IS_XDIGIT: - /* Not yet implemented */ - return TCL_ERROR; + return TclCompileBasicMin0ArgCmd(interp, parsePtr, cmdPtr, envPtr); case STR_IS_BOOL: case STR_IS_FALSE: case STR_IS_TRUE: - /* Not yet implemented */ - return TCL_ERROR; + CompileWord(envPtr, tokenPtr, interp, parsePtr->numWords-1); + OP( TRY_CVT_TO_BOOLEAN); + switch (t) { + int over, over2; + + case STR_IS_BOOL: + if (allowEmpty) { + JUMP1( JUMP_TRUE, over); + PUSH( ""); + OP( STR_EQ); + JUMP1( JUMP, over2); + FIXJUMP1(over); + OP( POP); + PUSH( "1"); + FIXJUMP1(over2); + } else { + OP4( REVERSE, 2); + OP( POP); + } + return TCL_OK; + case STR_IS_TRUE: + JUMP1( JUMP_TRUE, over); + if (allowEmpty) { + PUSH( ""); + OP( STR_EQ); + } else { + OP( POP); + PUSH( "0"); + } + FIXJUMP1( over); + OP( LNOT); + OP( LNOT); + return TCL_OK; + case STR_IS_FALSE: + JUMP1( JUMP_TRUE, over); + if (allowEmpty) { + PUSH( ""); + OP( STR_NEQ); + } else { + OP( POP); + PUSH( "1"); + } + FIXJUMP1( over); + OP( LNOT); + return TCL_OK; + } case STR_IS_DOUBLE: { int satisfied, isEmpty; diff --git a/generic/tclCompile.c b/generic/tclCompile.c index c01571f..39fa241 100644 --- a/generic/tclCompile.c +++ b/generic/tclCompile.c @@ -624,6 +624,9 @@ InstructionDesc const tclInstructionTable[] = { {"numericType", 1, 0, 0, {OPERAND_NONE}}, /* Pushes the numeric type code of the word at the top of the stack. * Stack: ... value => ... typeCode */ + {"tryCvtToBoolean", 1, +1, 0, {OPERAND_NONE}}, + /* Try converting stktop to boolean if possible. No errors. + * Stack: ... value => ... value isStrictBool */ {NULL, 0, 0, 0, {OPERAND_NONE}} }; diff --git a/generic/tclCompile.h b/generic/tclCompile.h index 6bf5daf..a08a93a 100644 --- a/generic/tclCompile.h +++ b/generic/tclCompile.h @@ -793,9 +793,10 @@ typedef struct ByteCode { #define INST_TCLOO_NEXT 179 #define INST_NUM_TYPE 180 +#define INST_TRY_CVT_TO_BOOLEAN 181 /* The last opcode */ -#define LAST_INST_OPCODE 180 +#define LAST_INST_OPCODE 181 /* * Table describing the Tcl bytecode instructions: their name (for displaying diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 2707ec1..989b7b6 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -6461,6 +6461,17 @@ TEBCresume( * ----------------------------------------------------------------- */ + case INST_TRY_CVT_TO_BOOLEAN: + valuePtr = OBJ_AT_TOS; + if (valuePtr->typePtr == &tclBooleanType) { + objResultPtr = TCONST(1); + } else { + int result = (TclSetBooleanFromAny(NULL, valuePtr) == TCL_OK); + objResultPtr = TCONST(result); + } + TRACE_WITH_OBJ(("\"%.30s\" => ", O2S(valuePtr)), objResultPtr); + NEXT_INST_F(1, 0, 1); + case INST_BREAK: /* DECACHE_STACK_INFO(); -- cgit v0.12 From 6597355ad8e93ef19d16f6fea501506b141946fb Mon Sep 17 00:00:00 2001 From: dkf Date: Wed, 29 Jan 2014 13:59:32 +0000 Subject: Compile [string is] with character classes in a non-awful way. Needs more work to make resulting bytecode disassemble nicely. --- generic/tclCompCmdsSZ.c | 99 ++++++++++++++++++++++++++++++++++++++++++++----- generic/tclCompile.c | 5 +++ generic/tclCompile.h | 37 +++++++++++++++++- generic/tclExecute.c | 19 ++++++++++ 4 files changed, 150 insertions(+), 10 deletions(-) diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c index 1a69a89..639b4a5 100644 --- a/generic/tclCompCmdsSZ.c +++ b/generic/tclCompCmdsSZ.c @@ -452,6 +452,7 @@ TclCompileStringIsCmd( STR_IS_XDIGIT }; int t, range, allowEmpty = 0, end; + InstStringClassType strClassType; Tcl_Obj *isClass; if (parsePtr->numWords < 3 || parsePtr->numWords > 6) { @@ -486,7 +487,7 @@ TclCompileStringIsCmd( tokenPtr = TokenAfter(tokenPtr); if (parsePtr->numWords == 3) { - allowEmpty = (t != STR_IS_LIST); + allowEmpty = 1; } else { if (!GotLiteral(tokenPtr, "-strict")) { return TCL_ERROR; @@ -496,30 +497,77 @@ TclCompileStringIsCmd( #undef GotLiteral /* - * Some types are not currently handled. Character classes are a prime - * example of this. + * Compile the code. There are several main classes of check here. + * 1. Character classes + * 2. Booleans + * 3. Integers + * 4. Floats + * 5. Lists */ + CompileWord(envPtr, tokenPtr, interp, parsePtr->numWords-1); + switch ((enum isClasses) t) { case STR_IS_ALNUM: + strClassType = STR_CLASS_ALNUM; + goto compileStrClass; case STR_IS_ALPHA: + strClassType = STR_CLASS_ALPHA; + goto compileStrClass; case STR_IS_ASCII: + strClassType = STR_CLASS_ASCII; + goto compileStrClass; case STR_IS_CONTROL: + strClassType = STR_CLASS_CONTROL; + goto compileStrClass; case STR_IS_DIGIT: + strClassType = STR_CLASS_DIGIT; + goto compileStrClass; case STR_IS_GRAPH: + strClassType = STR_CLASS_GRAPH; + goto compileStrClass; case STR_IS_LOWER: + strClassType = STR_CLASS_LOWER; + goto compileStrClass; case STR_IS_PRINT: + strClassType = STR_CLASS_PRINT; + goto compileStrClass; case STR_IS_PUNCT: + strClassType = STR_CLASS_PUNCT; + goto compileStrClass; case STR_IS_SPACE: + strClassType = STR_CLASS_SPACE; + goto compileStrClass; case STR_IS_UPPER: + strClassType = STR_CLASS_UPPER; + goto compileStrClass; case STR_IS_WORD: + strClassType = STR_CLASS_WORD; + goto compileStrClass; case STR_IS_XDIGIT: - return TclCompileBasicMin0ArgCmd(interp, parsePtr, cmdPtr, envPtr); + strClassType = STR_CLASS_XDIGIT; + compileStrClass: + if (allowEmpty) { + OP1( STR_CLASS, strClassType); + } else { + int over, over2; + + OP( DUP); + OP1( STR_CLASS, strClassType); + JUMP1( JUMP_TRUE, over); + OP( POP); + PUSH( "0"); + JUMP1( JUMP, over2); + FIXJUMP1(over); + PUSH( ""); + OP( STR_NEQ); + FIXJUMP1(over2); + } + return TCL_OK; case STR_IS_BOOL: case STR_IS_FALSE: case STR_IS_TRUE: - CompileWord(envPtr, tokenPtr, interp, parsePtr->numWords-1); OP( TRY_CVT_TO_BOOLEAN); switch (t) { int over, over2; @@ -569,7 +617,6 @@ TclCompileStringIsCmd( case STR_IS_DOUBLE: { int satisfied, isEmpty; - CompileWord(envPtr, tokenPtr, interp, parsePtr->numWords-1); if (allowEmpty) { OP( DUP); PUSH( ""); @@ -598,7 +645,6 @@ TclCompileStringIsCmd( case STR_IS_INT: case STR_IS_WIDE: case STR_IS_ENTIER: - CompileWord(envPtr, tokenPtr, interp, parsePtr->numWords-1); if (allowEmpty) { int testNumType; @@ -638,7 +684,6 @@ TclCompileStringIsCmd( return TCL_OK; case STR_IS_LIST: - CompileWord(envPtr, tokenPtr, interp, parsePtr->numWords-1); range = TclCreateExceptRange(CATCH_EXCEPTION_RANGE, envPtr); OP4( BEGIN_CATCH4, range); ExceptionRangeStarts(envPtr, range); @@ -653,7 +698,8 @@ TclCompileStringIsCmd( OP( LNOT); return TCL_OK; } - return TCL_ERROR; + + return TclCompileBasicMin0ArgCmd(interp, parsePtr, cmdPtr, envPtr); } int @@ -1171,6 +1217,41 @@ TclCompileStringToTitleCmd( } /* + * Support definitions for the [string is] compilation. + */ + +static int +UniCharIsAscii( + int character) +{ + return (character >= 0) && (character < 0x80); +} + +static int +UniCharIsHexDigit( + int character) +{ + return (character >= 0) && (character < 0x80) && isxdigit(character); +} + +StringClassDesc const tclStringClassTable[] = { + {"alnum", Tcl_UniCharIsAlnum}, + {"alpha", Tcl_UniCharIsAlpha}, + {"ascii", UniCharIsAscii}, + {"control", Tcl_UniCharIsControl}, + {"digit", Tcl_UniCharIsDigit}, + {"graph", Tcl_UniCharIsGraph}, + {"lower", Tcl_UniCharIsLower}, + {"print", Tcl_UniCharIsPrint}, + {"punct", Tcl_UniCharIsPunct}, + {"space", Tcl_UniCharIsSpace}, + {"upper", Tcl_UniCharIsUpper}, + {"word", Tcl_UniCharIsWordChar}, + {"xdigit", UniCharIsHexDigit}, + {NULL, NULL} +}; + +/* *---------------------------------------------------------------------- * * TclCompileSubstCmd -- diff --git a/generic/tclCompile.c b/generic/tclCompile.c index fdc3e26..08a7a4c 100644 --- a/generic/tclCompile.c +++ b/generic/tclCompile.c @@ -644,6 +644,11 @@ InstructionDesc const tclInstructionTable[] = { {"tryCvtToBoolean", 1, +1, 0, {OPERAND_NONE}}, /* Try converting stktop to boolean if possible. No errors. * Stack: ... value => ... value isStrictBool */ + {"strclass", 2, 0, 1, {OPERAND_UINT1}}, + /* See if all the characters of the given string are a member of the + * specified (by opnd) character class. Note that an empty string will + * satisfy the class check (standard definition of "all"). + * Stack: ... stringValue => ... boolean */ {NULL, 0, 0, 0, {OPERAND_NONE}} }; diff --git a/generic/tclCompile.h b/generic/tclCompile.h index d6d515d..502a2e6 100644 --- a/generic/tclCompile.h +++ b/generic/tclCompile.h @@ -797,9 +797,10 @@ typedef struct ByteCode { #define INST_NUM_TYPE 182 #define INST_TRY_CVT_TO_BOOLEAN 183 +#define INST_STR_CLASS 184 /* The last opcode */ -#define LAST_INST_OPCODE 183 +#define LAST_INST_OPCODE 184 /* * Table describing the Tcl bytecode instructions: their name (for displaying @@ -844,6 +845,40 @@ typedef struct InstructionDesc { MODULE_SCOPE InstructionDesc const tclInstructionTable[]; /* + * Constants used by INST_STRING_CLASS to indicate character classes. These + * correspond closely by name with what [string is] can support, but there is + * no requirement to keep the values the same. + */ + +typedef enum InstStringClassType { + STR_CLASS_ALNUM, /* Unicode alphabet or digit characters. */ + STR_CLASS_ALPHA, /* Unicode alphabet characters. */ + STR_CLASS_ASCII, /* Characters in range U+000000..U+00007F. */ + STR_CLASS_CONTROL, /* Unicode control characters. */ + STR_CLASS_DIGIT, /* Unicode digit characters. */ + STR_CLASS_GRAPH, /* Unicode printing characters, excluding + * space. */ + STR_CLASS_LOWER, /* Unicode lower-case alphabet characters. */ + STR_CLASS_PRINT, /* Unicode printing characters, including + * spaces. */ + STR_CLASS_PUNCT, /* Unicode punctuation characters. */ + STR_CLASS_SPACE, /* Unicode space characters. */ + STR_CLASS_UPPER, /* Unicode upper-case alphabet characters. */ + STR_CLASS_WORD, /* Unicode word (alphabetic, digit, connector + * punctuation) characters. */ + STR_CLASS_XDIGIT /* Characters that can be used as digits in + * hexadecimal numbers ([0-9A-Fa-f]). */ +} InstStringClassType; + +typedef struct StringClassDesc { + const char *name; /* Name of the class. */ + int (*comparator)(int); /* Function to test if a single unicode + * character is a member of the class. */ +} StringClassDesc; + +MODULE_SCOPE StringClassDesc const tclStringClassTable[]; + +/* * Compilation of some Tcl constructs such as if commands and the logical or * (||) and logical and (&&) operators in expressions requires the generation * of forward jumps. Since the PC target of these jumps isn't known when the diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 916de17..58d85e1 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5810,6 +5810,25 @@ TEBCresume( TclNewIntObj(objResultPtr, match); NEXT_INST_F(1, 2, 1); + + case INST_STR_CLASS: + opnd = TclGetInt1AtPtr(pc+1); + valuePtr = OBJ_AT_TOS; + TRACE(("%s \"%.30s\" => ", tclStringClassTable[opnd].name, + O2S(valuePtr))); + ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length); + match = 1; + if (length > 0) { + end = ustring1 + length; + for (p=ustring1 ; p Date: Sun, 2 Feb 2014 14:42:12 +0000 Subject: improve the disassembly --- generic/tclCompile.c | 7 ++++++- generic/tclCompile.h | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/generic/tclCompile.c b/generic/tclCompile.c index 08a7a4c..c5d0107 100644 --- a/generic/tclCompile.c +++ b/generic/tclCompile.c @@ -644,7 +644,7 @@ InstructionDesc const tclInstructionTable[] = { {"tryCvtToBoolean", 1, +1, 0, {OPERAND_NONE}}, /* Try converting stktop to boolean if possible. No errors. * Stack: ... value => ... value isStrictBool */ - {"strclass", 2, 0, 1, {OPERAND_UINT1}}, + {"strclass", 2, 0, 1, {OPERAND_SCLS1}}, /* See if all the characters of the given string are a member of the * specified (by opnd) character class. Note that an empty string will * satisfy the class check (standard definition of "all"). @@ -5097,6 +5097,11 @@ FormatInstruction( } Tcl_AppendPrintfToObj(bufferObj, "%%v%u ", (unsigned) opnd); break; + case OPERAND_SCLS1: + opnd = TclGetUInt1AtPtr(pc+numBytes); numBytes++; + Tcl_AppendPrintfToObj(bufferObj, "%s ", + tclStringClassTable[opnd].name); + break; case OPERAND_NONE: default: break; diff --git a/generic/tclCompile.h b/generic/tclCompile.h index 502a2e6..5665ca9 100644 --- a/generic/tclCompile.h +++ b/generic/tclCompile.h @@ -825,8 +825,9 @@ typedef enum InstOperandType { * variable table. */ OPERAND_LVT4, /* Four byte unsigned index into the local * variable table. */ - OPERAND_AUX4 /* Four byte unsigned index into the aux data + OPERAND_AUX4, /* Four byte unsigned index into the aux data * table. */ + OPERAND_SCLS1 /* Index into tclStringClassTable. */ } InstOperandType; typedef struct InstructionDesc { -- cgit v0.12