summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2014-01-29 13:59:32 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2014-01-29 13:59:32 (GMT)
commit6597355ad8e93ef19d16f6fea501506b141946fb (patch)
tree2091eb2c45df9f3cf0d55b3f84cabe501772869d
parentf4549b8fc40b29db66113e7a63678f2f0c45db78 (diff)
downloadtcl-6597355ad8e93ef19d16f6fea501506b141946fb.zip
tcl-6597355ad8e93ef19d16f6fea501506b141946fb.tar.gz
tcl-6597355ad8e93ef19d16f6fea501506b141946fb.tar.bz2
Compile [string is] with character classes in a non-awful way. Needs more work to make resulting bytecode disassemble nicely.
-rw-r--r--generic/tclCompCmdsSZ.c99
-rw-r--r--generic/tclCompile.c5
-rw-r--r--generic/tclCompile.h37
-rw-r--r--generic/tclExecute.c19
4 files changed, 150 insertions, 10 deletions
diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c
index 1a69a89..639b4a5 100644
--- a/generic/tclCompCmdsSZ.c
+++ b/generic/tclCompCmdsSZ.c
@@ -452,6 +452,7 @@ TclCompileStringIsCmd(
STR_IS_XDIGIT
};
int t, range, allowEmpty = 0, end;
+ InstStringClassType strClassType;
Tcl_Obj *isClass;
if (parsePtr->numWords < 3 || parsePtr->numWords > 6) {
@@ -486,7 +487,7 @@ TclCompileStringIsCmd(
tokenPtr = TokenAfter(tokenPtr);
if (parsePtr->numWords == 3) {
- allowEmpty = (t != STR_IS_LIST);
+ allowEmpty = 1;
} else {
if (!GotLiteral(tokenPtr, "-strict")) {
return TCL_ERROR;
@@ -496,30 +497,77 @@ TclCompileStringIsCmd(
#undef GotLiteral
/*
- * Some types are not currently handled. Character classes are a prime
- * example of this.
+ * Compile the code. There are several main classes of check here.
+ * 1. Character classes
+ * 2. Booleans
+ * 3. Integers
+ * 4. Floats
+ * 5. Lists
*/
+ CompileWord(envPtr, tokenPtr, interp, parsePtr->numWords-1);
+
switch ((enum isClasses) t) {
case STR_IS_ALNUM:
+ strClassType = STR_CLASS_ALNUM;
+ goto compileStrClass;
case STR_IS_ALPHA:
+ strClassType = STR_CLASS_ALPHA;
+ goto compileStrClass;
case STR_IS_ASCII:
+ strClassType = STR_CLASS_ASCII;
+ goto compileStrClass;
case STR_IS_CONTROL:
+ strClassType = STR_CLASS_CONTROL;
+ goto compileStrClass;
case STR_IS_DIGIT:
+ strClassType = STR_CLASS_DIGIT;
+ goto compileStrClass;
case STR_IS_GRAPH:
+ strClassType = STR_CLASS_GRAPH;
+ goto compileStrClass;
case STR_IS_LOWER:
+ strClassType = STR_CLASS_LOWER;
+ goto compileStrClass;
case STR_IS_PRINT:
+ strClassType = STR_CLASS_PRINT;
+ goto compileStrClass;
case STR_IS_PUNCT:
+ strClassType = STR_CLASS_PUNCT;
+ goto compileStrClass;
case STR_IS_SPACE:
+ strClassType = STR_CLASS_SPACE;
+ goto compileStrClass;
case STR_IS_UPPER:
+ strClassType = STR_CLASS_UPPER;
+ goto compileStrClass;
case STR_IS_WORD:
+ strClassType = STR_CLASS_WORD;
+ goto compileStrClass;
case STR_IS_XDIGIT:
- return TclCompileBasicMin0ArgCmd(interp, parsePtr, cmdPtr, envPtr);
+ strClassType = STR_CLASS_XDIGIT;
+ compileStrClass:
+ if (allowEmpty) {
+ OP1( STR_CLASS, strClassType);
+ } else {
+ int over, over2;
+
+ OP( DUP);
+ OP1( STR_CLASS, strClassType);
+ JUMP1( JUMP_TRUE, over);
+ OP( POP);
+ PUSH( "0");
+ JUMP1( JUMP, over2);
+ FIXJUMP1(over);
+ PUSH( "");
+ OP( STR_NEQ);
+ FIXJUMP1(over2);
+ }
+ return TCL_OK;
case STR_IS_BOOL:
case STR_IS_FALSE:
case STR_IS_TRUE:
- CompileWord(envPtr, tokenPtr, interp, parsePtr->numWords-1);
OP( TRY_CVT_TO_BOOLEAN);
switch (t) {
int over, over2;
@@ -569,7 +617,6 @@ TclCompileStringIsCmd(
case STR_IS_DOUBLE: {
int satisfied, isEmpty;
- CompileWord(envPtr, tokenPtr, interp, parsePtr->numWords-1);
if (allowEmpty) {
OP( DUP);
PUSH( "");
@@ -598,7 +645,6 @@ TclCompileStringIsCmd(
case STR_IS_INT:
case STR_IS_WIDE:
case STR_IS_ENTIER:
- CompileWord(envPtr, tokenPtr, interp, parsePtr->numWords-1);
if (allowEmpty) {
int testNumType;
@@ -638,7 +684,6 @@ TclCompileStringIsCmd(
return TCL_OK;
case STR_IS_LIST:
- CompileWord(envPtr, tokenPtr, interp, parsePtr->numWords-1);
range = TclCreateExceptRange(CATCH_EXCEPTION_RANGE, envPtr);
OP4( BEGIN_CATCH4, range);
ExceptionRangeStarts(envPtr, range);
@@ -653,7 +698,8 @@ TclCompileStringIsCmd(
OP( LNOT);
return TCL_OK;
}
- return TCL_ERROR;
+
+ return TclCompileBasicMin0ArgCmd(interp, parsePtr, cmdPtr, envPtr);
}
int
@@ -1171,6 +1217,41 @@ TclCompileStringToTitleCmd(
}
/*
+ * Support definitions for the [string is] compilation.
+ */
+
+static int
+UniCharIsAscii(
+ int character)
+{
+ return (character >= 0) && (character < 0x80);
+}
+
+static int
+UniCharIsHexDigit(
+ int character)
+{
+ return (character >= 0) && (character < 0x80) && isxdigit(character);
+}
+
+StringClassDesc const tclStringClassTable[] = {
+ {"alnum", Tcl_UniCharIsAlnum},
+ {"alpha", Tcl_UniCharIsAlpha},
+ {"ascii", UniCharIsAscii},
+ {"control", Tcl_UniCharIsControl},
+ {"digit", Tcl_UniCharIsDigit},
+ {"graph", Tcl_UniCharIsGraph},
+ {"lower", Tcl_UniCharIsLower},
+ {"print", Tcl_UniCharIsPrint},
+ {"punct", Tcl_UniCharIsPunct},
+ {"space", Tcl_UniCharIsSpace},
+ {"upper", Tcl_UniCharIsUpper},
+ {"word", Tcl_UniCharIsWordChar},
+ {"xdigit", UniCharIsHexDigit},
+ {NULL, NULL}
+};
+
+/*
*----------------------------------------------------------------------
*
* TclCompileSubstCmd --
diff --git a/generic/tclCompile.c b/generic/tclCompile.c
index fdc3e26..08a7a4c 100644
--- a/generic/tclCompile.c
+++ b/generic/tclCompile.c
@@ -644,6 +644,11 @@ InstructionDesc const tclInstructionTable[] = {
{"tryCvtToBoolean", 1, +1, 0, {OPERAND_NONE}},
/* Try converting stktop to boolean if possible. No errors.
* Stack: ... value => ... value isStrictBool */
+ {"strclass", 2, 0, 1, {OPERAND_UINT1}},
+ /* See if all the characters of the given string are a member of the
+ * specified (by opnd) character class. Note that an empty string will
+ * satisfy the class check (standard definition of "all").
+ * Stack: ... stringValue => ... boolean */
{NULL, 0, 0, 0, {OPERAND_NONE}}
};
diff --git a/generic/tclCompile.h b/generic/tclCompile.h
index d6d515d..502a2e6 100644
--- a/generic/tclCompile.h
+++ b/generic/tclCompile.h
@@ -797,9 +797,10 @@ typedef struct ByteCode {
#define INST_NUM_TYPE 182
#define INST_TRY_CVT_TO_BOOLEAN 183
+#define INST_STR_CLASS 184
/* The last opcode */
-#define LAST_INST_OPCODE 183
+#define LAST_INST_OPCODE 184
/*
* Table describing the Tcl bytecode instructions: their name (for displaying
@@ -844,6 +845,40 @@ typedef struct InstructionDesc {
MODULE_SCOPE InstructionDesc const tclInstructionTable[];
/*
+ * Constants used by INST_STRING_CLASS to indicate character classes. These
+ * correspond closely by name with what [string is] can support, but there is
+ * no requirement to keep the values the same.
+ */
+
+typedef enum InstStringClassType {
+ STR_CLASS_ALNUM, /* Unicode alphabet or digit characters. */
+ STR_CLASS_ALPHA, /* Unicode alphabet characters. */
+ STR_CLASS_ASCII, /* Characters in range U+000000..U+00007F. */
+ STR_CLASS_CONTROL, /* Unicode control characters. */
+ STR_CLASS_DIGIT, /* Unicode digit characters. */
+ STR_CLASS_GRAPH, /* Unicode printing characters, excluding
+ * space. */
+ STR_CLASS_LOWER, /* Unicode lower-case alphabet characters. */
+ STR_CLASS_PRINT, /* Unicode printing characters, including
+ * spaces. */
+ STR_CLASS_PUNCT, /* Unicode punctuation characters. */
+ STR_CLASS_SPACE, /* Unicode space characters. */
+ STR_CLASS_UPPER, /* Unicode upper-case alphabet characters. */
+ STR_CLASS_WORD, /* Unicode word (alphabetic, digit, connector
+ * punctuation) characters. */
+ STR_CLASS_XDIGIT /* Characters that can be used as digits in
+ * hexadecimal numbers ([0-9A-Fa-f]). */
+} InstStringClassType;
+
+typedef struct StringClassDesc {
+ const char *name; /* Name of the class. */
+ int (*comparator)(int); /* Function to test if a single unicode
+ * character is a member of the class. */
+} StringClassDesc;
+
+MODULE_SCOPE StringClassDesc const tclStringClassTable[];
+
+/*
* Compilation of some Tcl constructs such as if commands and the logical or
* (||) and logical and (&&) operators in expressions requires the generation
* of forward jumps. Since the PC target of these jumps isn't known when the
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index 916de17..58d85e1 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -5810,6 +5810,25 @@ TEBCresume(
TclNewIntObj(objResultPtr, match);
NEXT_INST_F(1, 2, 1);
+
+ case INST_STR_CLASS:
+ opnd = TclGetInt1AtPtr(pc+1);
+ valuePtr = OBJ_AT_TOS;
+ TRACE(("%s \"%.30s\" => ", tclStringClassTable[opnd].name,
+ O2S(valuePtr)));
+ ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length);
+ match = 1;
+ if (length > 0) {
+ end = ustring1 + length;
+ for (p=ustring1 ; p<end ; p++) {
+ if (!tclStringClassTable[opnd].comparator(*p)) {
+ match = 0;
+ break;
+ }
+ }
+ }
+ TRACE_APPEND(("%d\n", match));
+ JUMP_PEEPHOLE_F(match, 2, 1);
}
case INST_STR_MATCH: