diff options
author | dkf <donal.k.fellows@manchester.ac.uk> | 2014-02-02 16:01:21 (GMT) |
---|---|---|
committer | dkf <donal.k.fellows@manchester.ac.uk> | 2014-02-02 16:01:21 (GMT) |
commit | c574af192dda62d1c02cd81d06a59d8df7fe40d5 (patch) | |
tree | 3f87a56a73d52bfcc21f03f1894519de5ba04799 | |
parent | 767efbf47855d7873ba1dfdf48f9655f805aa270 (diff) | |
parent | fd4bc0f1066dc0146f73b108bae61e45b52a3743 (diff) | |
download | tcl-c574af192dda62d1c02cd81d06a59d8df7fe40d5.zip tcl-c574af192dda62d1c02cd81d06a59d8df7fe40d5.tar.gz tcl-c574af192dda62d1c02cd81d06a59d8df7fe40d5.tar.bz2 |
add compilation of [string is]
-rw-r--r-- | generic/tclAssembly.c | 5 | ||||
-rw-r--r-- | generic/tclCmdMZ.c | 2 | ||||
-rw-r--r-- | generic/tclCompCmdsSZ.c | 313 | ||||
-rw-r--r-- | generic/tclCompile.c | 17 | ||||
-rw-r--r-- | generic/tclCompile.h | 43 | ||||
-rw-r--r-- | generic/tclExecute.c | 38 | ||||
-rw-r--r-- | generic/tclInt.h | 3 |
7 files changed, 417 insertions, 4 deletions
diff --git a/generic/tclAssembly.c b/generic/tclAssembly.c index 55a0c3f..d1866c8 100644 --- a/generic/tclAssembly.c +++ b/generic/tclAssembly.c @@ -438,6 +438,7 @@ static const TalInstDesc TalInstructionTable[] = { {"nop", ASSEM_1BYTE, INST_NOP, 0, 0}, {"not", ASSEM_1BYTE, INST_LNOT, 1, 1}, {"nsupvar", ASSEM_LVT4, INST_NSUPVAR, 2, 1}, + {"numericType", ASSEM_1BYTE, INST_NUM_TYPE, 1, 1}, {"originCmd", ASSEM_1BYTE, INST_ORIGIN_COMMAND, 1, 1}, {"over", ASSEM_OVER, INST_OVER, INT_MIN,-1-1}, {"pop", ASSEM_1BYTE, INST_POP, 1, 0}, @@ -478,6 +479,7 @@ static const TalInstDesc TalInstructionTable[] = { {"tclooIsObject", ASSEM_1BYTE, INST_TCLOO_IS_OBJECT, 1, 1}, {"tclooNamespace", ASSEM_1BYTE, INST_TCLOO_NS, 1, 1}, {"tclooSelf", ASSEM_1BYTE, INST_TCLOO_SELF, 0, 1}, + {"tryCvtToBoolean", ASSEM_1BYTE, INST_TRY_CVT_TO_BOOLEAN,1, 2}, {"tryCvtToNumeric", ASSEM_1BYTE, INST_TRY_CVT_TO_NUMERIC,1, 1}, {"uminus", ASSEM_1BYTE, INST_UMINUS, 1, 1}, {"unset", ASSEM_BOOL_LVT4,INST_UNSET_SCALAR, 0, 0}, @@ -517,7 +519,8 @@ static const unsigned char NonThrowingByteCodes[] = { INST_RESOLVE_COMMAND, /* 154 */ INST_STR_TRIM, INST_STR_TRIM_LEFT, INST_STR_TRIM_RIGHT, /* 166-168 */ INST_CONCAT_STK, /* 169 */ - INST_STR_UPPER, INST_STR_LOWER, INST_STR_TITLE /* 170-172 */ + INST_STR_UPPER, INST_STR_LOWER, INST_STR_TITLE, /* 170-172 */ + INST_NUM_TYPE /* 180 */ }; /* diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index c02cd1a..00c9f2f 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -3332,7 +3332,7 @@ TclInitStringCmd( {"equal", StringEqualCmd, TclCompileStringEqualCmd, NULL, NULL, 0}, {"first", StringFirstCmd, TclCompileStringFirstCmd, NULL, NULL, 0}, {"index", StringIndexCmd, TclCompileStringIndexCmd, NULL, NULL, 0}, - {"is", StringIsCmd, NULL, NULL, NULL, 0}, + {"is", StringIsCmd, TclCompileStringIsCmd, NULL, NULL, 0}, {"last", StringLastCmd, TclCompileStringLastCmd, NULL, NULL, 0}, {"length", StringLenCmd, TclCompileStringLenCmd, NULL, NULL, 0}, {"map", StringMapCmd, TclCompileStringMapCmd, NULL, NULL, 0}, diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c index 4b14f24..e6ec0a6 100644 --- a/generic/tclCompCmdsSZ.c +++ b/generic/tclCompCmdsSZ.c @@ -425,6 +425,284 @@ TclCompileStringIndexCmd( } int +TclCompileStringIsCmd( + Tcl_Interp *interp, /* Used for error reporting. */ + Tcl_Parse *parsePtr, /* Points to a parse structure for the command + * created by Tcl_ParseCommand. */ + Command *cmdPtr, /* Points to defintion of command being + * compiled. */ + CompileEnv *envPtr) /* Holds resulting instructions. */ +{ + DefineLineInformation; /* TIP #280 */ + Tcl_Token *tokenPtr = TokenAfter(parsePtr->tokenPtr); + static const char *const isClasses[] = { + "alnum", "alpha", "ascii", "control", + "boolean", "digit", "double", "entier", + "false", "graph", "integer", "list", + "lower", "print", "punct", "space", + "true", "upper", "wideinteger", "wordchar", + "xdigit", NULL + }; + enum isClasses { + STR_IS_ALNUM, STR_IS_ALPHA, STR_IS_ASCII, STR_IS_CONTROL, + STR_IS_BOOL, STR_IS_DIGIT, STR_IS_DOUBLE, STR_IS_ENTIER, + STR_IS_FALSE, STR_IS_GRAPH, STR_IS_INT, STR_IS_LIST, + STR_IS_LOWER, STR_IS_PRINT, STR_IS_PUNCT, STR_IS_SPACE, + STR_IS_TRUE, STR_IS_UPPER, STR_IS_WIDE, STR_IS_WORD, + STR_IS_XDIGIT + }; + int t, range, allowEmpty = 0, end; + InstStringClassType strClassType; + Tcl_Obj *isClass; + + if (parsePtr->numWords < 3 || parsePtr->numWords > 6) { + return TCL_ERROR; + } + isClass = Tcl_NewObj(); + if (!TclWordKnownAtCompileTime(tokenPtr, isClass)) { + Tcl_DecrRefCount(isClass); + return TCL_ERROR; + } else if (Tcl_GetIndexFromObj(interp, isClass, isClasses, "class", 0, + &t) != TCL_OK) { + Tcl_DecrRefCount(isClass); + TclCompileSyntaxError(interp, envPtr); + return TCL_OK; + } + Tcl_DecrRefCount(isClass); + +#define GotLiteral(tokenPtr, word) \ + ((tokenPtr)->type == TCL_TOKEN_SIMPLE_WORD && \ + (tokenPtr)[1].size > 1 && \ + (tokenPtr)[1].start[0] == word[0] && \ + strncmp((tokenPtr)[1].start, (word), (tokenPtr)[1].size) == 0) + + /* + * Cannot handle the -failindex option at all, and that's the only legal + * way to have more than 4 arguments. + */ + + if (parsePtr->numWords != 3 && parsePtr->numWords != 4) { + return TCL_ERROR; + } + + tokenPtr = TokenAfter(tokenPtr); + if (parsePtr->numWords == 3) { + allowEmpty = 1; + } else { + if (!GotLiteral(tokenPtr, "-strict")) { + return TCL_ERROR; + } + tokenPtr = TokenAfter(tokenPtr); + } +#undef GotLiteral + + /* + * Compile the code. There are several main classes of check here. + * 1. Character classes + * 2. Booleans + * 3. Integers + * 4. Floats + * 5. Lists + */ + + CompileWord(envPtr, tokenPtr, interp, parsePtr->numWords-1); + + switch ((enum isClasses) t) { + case STR_IS_ALNUM: + strClassType = STR_CLASS_ALNUM; + goto compileStrClass; + case STR_IS_ALPHA: + strClassType = STR_CLASS_ALPHA; + goto compileStrClass; + case STR_IS_ASCII: + strClassType = STR_CLASS_ASCII; + goto compileStrClass; + case STR_IS_CONTROL: + strClassType = STR_CLASS_CONTROL; + goto compileStrClass; + case STR_IS_DIGIT: + strClassType = STR_CLASS_DIGIT; + goto compileStrClass; + case STR_IS_GRAPH: + strClassType = STR_CLASS_GRAPH; + goto compileStrClass; + case STR_IS_LOWER: + strClassType = STR_CLASS_LOWER; + goto compileStrClass; + case STR_IS_PRINT: + strClassType = STR_CLASS_PRINT; + goto compileStrClass; + case STR_IS_PUNCT: + strClassType = STR_CLASS_PUNCT; + goto compileStrClass; + case STR_IS_SPACE: + strClassType = STR_CLASS_SPACE; + goto compileStrClass; + case STR_IS_UPPER: + strClassType = STR_CLASS_UPPER; + goto compileStrClass; + case STR_IS_WORD: + strClassType = STR_CLASS_WORD; + goto compileStrClass; + case STR_IS_XDIGIT: + strClassType = STR_CLASS_XDIGIT; + compileStrClass: + if (allowEmpty) { + OP1( STR_CLASS, strClassType); + } else { + int over, over2; + + OP( DUP); + OP1( STR_CLASS, strClassType); + JUMP1( JUMP_TRUE, over); + OP( POP); + PUSH( "0"); + JUMP1( JUMP, over2); + FIXJUMP1(over); + PUSH( ""); + OP( STR_NEQ); + FIXJUMP1(over2); + } + return TCL_OK; + + case STR_IS_BOOL: + case STR_IS_FALSE: + case STR_IS_TRUE: + OP( TRY_CVT_TO_BOOLEAN); + switch (t) { + int over, over2; + + case STR_IS_BOOL: + if (allowEmpty) { + JUMP1( JUMP_TRUE, over); + PUSH( ""); + OP( STR_EQ); + JUMP1( JUMP, over2); + FIXJUMP1(over); + OP( POP); + PUSH( "1"); + FIXJUMP1(over2); + } else { + OP4( REVERSE, 2); + OP( POP); + } + return TCL_OK; + case STR_IS_TRUE: + JUMP1( JUMP_TRUE, over); + if (allowEmpty) { + PUSH( ""); + OP( STR_EQ); + } else { + OP( POP); + PUSH( "0"); + } + FIXJUMP1( over); + OP( LNOT); + OP( LNOT); + return TCL_OK; + case STR_IS_FALSE: + JUMP1( JUMP_TRUE, over); + if (allowEmpty) { + PUSH( ""); + OP( STR_NEQ); + } else { + OP( POP); + PUSH( "1"); + } + FIXJUMP1( over); + OP( LNOT); + return TCL_OK; + } + + case STR_IS_DOUBLE: { + int satisfied, isEmpty; + + if (allowEmpty) { + OP( DUP); + PUSH( ""); + OP( STR_EQ); + JUMP1( JUMP_TRUE, isEmpty); + OP( NUM_TYPE); + JUMP1( JUMP_TRUE, satisfied); + PUSH( "0"); + JUMP1( JUMP, end); + FIXJUMP1( isEmpty); + OP( POP); + FIXJUMP1( satisfied); + } else { + OP( NUM_TYPE); + JUMP1( JUMP_TRUE, satisfied); + PUSH( "0"); + JUMP1( JUMP, end); + TclAdjustStackDepth(-1, envPtr); + FIXJUMP1( satisfied); + } + PUSH( "1"); + FIXJUMP1( end); + return TCL_OK; + } + + case STR_IS_INT: + case STR_IS_WIDE: + case STR_IS_ENTIER: + if (allowEmpty) { + int testNumType; + + OP( DUP); + OP( NUM_TYPE); + OP( DUP); + JUMP1( JUMP_TRUE, testNumType); + OP( POP); + PUSH( ""); + OP( STR_EQ); + JUMP1( JUMP, end); + TclAdjustStackDepth(1, envPtr); + FIXJUMP1( testNumType); + OP4( REVERSE, 2); + OP( POP); + } else { + OP( NUM_TYPE); + OP( DUP); + JUMP1( JUMP_FALSE, end); + } + + switch (t) { + case STR_IS_INT: + PUSH( "1"); + OP( EQ); + break; + case STR_IS_WIDE: + PUSH( "2"); + OP( LE); + break; + case STR_IS_ENTIER: + PUSH( "3"); + OP( LE); + break; + } + FIXJUMP1( end); + return TCL_OK; + + case STR_IS_LIST: + range = TclCreateExceptRange(CATCH_EXCEPTION_RANGE, envPtr); + OP4( BEGIN_CATCH4, range); + ExceptionRangeStarts(envPtr, range); + OP( DUP); + OP( LIST_LENGTH); + OP( POP); + ExceptionRangeEnds(envPtr, range); + ExceptionRangeTarget(envPtr, range, catchOffset); + OP( POP); + OP( PUSH_RETURN_CODE); + OP( END_CATCH); + OP( LNOT); + return TCL_OK; + } + + return TclCompileBasicMin0ArgCmd(interp, parsePtr, cmdPtr, envPtr); +} + +int TclCompileStringMatchCmd( Tcl_Interp *interp, /* Used for error reporting. */ Tcl_Parse *parsePtr, /* Points to a parse structure for the command @@ -939,6 +1217,41 @@ TclCompileStringToTitleCmd( } /* + * Support definitions for the [string is] compilation. + */ + +static int +UniCharIsAscii( + int character) +{ + return (character >= 0) && (character < 0x80); +} + +static int +UniCharIsHexDigit( + int character) +{ + return (character >= 0) && (character < 0x80) && isxdigit(character); +} + +StringClassDesc const tclStringClassTable[] = { + {"alnum", Tcl_UniCharIsAlnum}, + {"alpha", Tcl_UniCharIsAlpha}, + {"ascii", UniCharIsAscii}, + {"control", Tcl_UniCharIsControl}, + {"digit", Tcl_UniCharIsDigit}, + {"graph", Tcl_UniCharIsGraph}, + {"lower", Tcl_UniCharIsLower}, + {"print", Tcl_UniCharIsPrint}, + {"punct", Tcl_UniCharIsPunct}, + {"space", Tcl_UniCharIsSpace}, + {"upper", Tcl_UniCharIsUpper}, + {"word", Tcl_UniCharIsWordChar}, + {"xdigit", UniCharIsHexDigit}, + {NULL, NULL} +}; + +/* *---------------------------------------------------------------------- * * TclCompileSubstCmd -- diff --git a/generic/tclCompile.c b/generic/tclCompile.c index f75ac83..c5d0107 100644 --- a/generic/tclCompile.c +++ b/generic/tclCompile.c @@ -638,6 +638,18 @@ InstructionDesc const tclInstructionTable[] = { * that are the response back on top of the stack when it resumes. * Stack: ... [list ns cmd arg1 ... argN] => ... resumeList */ + {"numericType", 1, 0, 0, {OPERAND_NONE}}, + /* Pushes the numeric type code of the word at the top of the stack. + * Stack: ... value => ... typeCode */ + {"tryCvtToBoolean", 1, +1, 0, {OPERAND_NONE}}, + /* Try converting stktop to boolean if possible. No errors. + * Stack: ... value => ... value isStrictBool */ + {"strclass", 2, 0, 1, {OPERAND_SCLS1}}, + /* See if all the characters of the given string are a member of the + * specified (by opnd) character class. Note that an empty string will + * satisfy the class check (standard definition of "all"). + * Stack: ... stringValue => ... boolean */ + {NULL, 0, 0, 0, {OPERAND_NONE}} }; @@ -5085,6 +5097,11 @@ FormatInstruction( } Tcl_AppendPrintfToObj(bufferObj, "%%v%u ", (unsigned) opnd); break; + case OPERAND_SCLS1: + opnd = TclGetUInt1AtPtr(pc+numBytes); numBytes++; + Tcl_AppendPrintfToObj(bufferObj, "%s ", + tclStringClassTable[opnd].name); + break; case OPERAND_NONE: default: break; diff --git a/generic/tclCompile.h b/generic/tclCompile.h index 7994e2c..5665ca9 100644 --- a/generic/tclCompile.h +++ b/generic/tclCompile.h @@ -795,8 +795,12 @@ typedef struct ByteCode { #define INST_YIELD_TO_INVOKE 181 +#define INST_NUM_TYPE 182 +#define INST_TRY_CVT_TO_BOOLEAN 183 +#define INST_STR_CLASS 184 + /* The last opcode */ -#define LAST_INST_OPCODE 181 +#define LAST_INST_OPCODE 184 /* * Table describing the Tcl bytecode instructions: their name (for displaying @@ -821,8 +825,9 @@ typedef enum InstOperandType { * variable table. */ OPERAND_LVT4, /* Four byte unsigned index into the local * variable table. */ - OPERAND_AUX4 /* Four byte unsigned index into the aux data + OPERAND_AUX4, /* Four byte unsigned index into the aux data * table. */ + OPERAND_SCLS1 /* Index into tclStringClassTable. */ } InstOperandType; typedef struct InstructionDesc { @@ -841,6 +846,40 @@ typedef struct InstructionDesc { MODULE_SCOPE InstructionDesc const tclInstructionTable[]; /* + * Constants used by INST_STRING_CLASS to indicate character classes. These + * correspond closely by name with what [string is] can support, but there is + * no requirement to keep the values the same. + */ + +typedef enum InstStringClassType { + STR_CLASS_ALNUM, /* Unicode alphabet or digit characters. */ + STR_CLASS_ALPHA, /* Unicode alphabet characters. */ + STR_CLASS_ASCII, /* Characters in range U+000000..U+00007F. */ + STR_CLASS_CONTROL, /* Unicode control characters. */ + STR_CLASS_DIGIT, /* Unicode digit characters. */ + STR_CLASS_GRAPH, /* Unicode printing characters, excluding + * space. */ + STR_CLASS_LOWER, /* Unicode lower-case alphabet characters. */ + STR_CLASS_PRINT, /* Unicode printing characters, including + * spaces. */ + STR_CLASS_PUNCT, /* Unicode punctuation characters. */ + STR_CLASS_SPACE, /* Unicode space characters. */ + STR_CLASS_UPPER, /* Unicode upper-case alphabet characters. */ + STR_CLASS_WORD, /* Unicode word (alphabetic, digit, connector + * punctuation) characters. */ + STR_CLASS_XDIGIT /* Characters that can be used as digits in + * hexadecimal numbers ([0-9A-Fa-f]). */ +} InstStringClassType; + +typedef struct StringClassDesc { + const char *name; /* Name of the class. */ + int (*comparator)(int); /* Function to test if a single unicode + * character is a member of the class. */ +} StringClassDesc; + +MODULE_SCOPE StringClassDesc const tclStringClassTable[]; + +/* * Compilation of some Tcl constructs such as if commands and the logical or * (||) and logical and (&&) operators in expressions requires the generation * of forward jumps. Since the PC target of these jumps isn't known when the diff --git a/generic/tclExecute.c b/generic/tclExecute.c index e8bc903..ac78370 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5810,6 +5810,25 @@ TEBCresume( TclNewIntObj(objResultPtr, match); NEXT_INST_F(1, 2, 1); + + case INST_STR_CLASS: + opnd = TclGetInt1AtPtr(pc+1); + valuePtr = OBJ_AT_TOS; + TRACE(("%s \"%.30s\" => ", tclStringClassTable[opnd].name, + O2S(valuePtr))); + ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length); + match = 1; + if (length > 0) { + end = ustring1 + length; + for (p=ustring1 ; p<end ; p++) { + if (!tclStringClassTable[opnd].comparator(*p)) { + match = 0; + break; + } + } + } + TRACE_APPEND(("%d\n", match)); + JUMP_PEEPHOLE_F(match, 2, 1); } case INST_STR_MATCH: @@ -5944,6 +5963,14 @@ TEBCresume( int type1, type2; long l1, l2, lResult; + case INST_NUM_TYPE: + if (GetNumberFromObj(NULL, OBJ_AT_TOS, &ptr1, &type1) != TCL_OK) { + type1 = 0; + } + TclNewIntObj(objResultPtr, type1); + TRACE(("\"%.20s\" => %d\n", O2S(OBJ_AT_TOS), type1)); + NEXT_INST_F(1, 1, 1); + case INST_EQ: case INST_NEQ: case INST_LT: @@ -6621,6 +6648,17 @@ TEBCresume( * ----------------------------------------------------------------- */ + case INST_TRY_CVT_TO_BOOLEAN: + valuePtr = OBJ_AT_TOS; + if (valuePtr->typePtr == &tclBooleanType) { + objResultPtr = TCONST(1); + } else { + int result = (TclSetBooleanFromAny(NULL, valuePtr) == TCL_OK); + objResultPtr = TCONST(result); + } + TRACE_WITH_OBJ(("\"%.30s\" => ", O2S(valuePtr)), objResultPtr); + NEXT_INST_F(1, 0, 1); + case INST_BREAK: /* DECACHE_STACK_INFO(); diff --git a/generic/tclInt.h b/generic/tclInt.h index 6ddb015..a9f4c16 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3622,6 +3622,9 @@ MODULE_SCOPE int TclCompileStringFirstCmd(Tcl_Interp *interp, MODULE_SCOPE int TclCompileStringIndexCmd(Tcl_Interp *interp, Tcl_Parse *parsePtr, Command *cmdPtr, struct CompileEnv *envPtr); +MODULE_SCOPE int TclCompileStringIsCmd(Tcl_Interp *interp, + Tcl_Parse *parsePtr, Command *cmdPtr, + struct CompileEnv *envPtr); MODULE_SCOPE int TclCompileStringLastCmd(Tcl_Interp *interp, Tcl_Parse *parsePtr, Command *cmdPtr, struct CompileEnv *envPtr); |