From 11fdcc3fedcd680e93a93d28bf862d388ada3f9d Mon Sep 17 00:00:00 2001 From: dkf Date: Mon, 30 Sep 2013 03:00:06 +0000 Subject: First attempt at [string trim] compilation. --- generic/tclAssembly.c | 5 +- generic/tclCmdMZ.c | 8 +-- generic/tclCompCmdsSZ.c | 135 ++++++++++++++++++++++++++++++++++++++++++++++++ generic/tclCompile.c | 11 ++++ generic/tclCompile.h | 6 ++- generic/tclExecute.c | 33 ++++++++++++ generic/tclInt.h | 9 ++++ 7 files changed, 202 insertions(+), 5 deletions(-) diff --git a/generic/tclAssembly.c b/generic/tclAssembly.c index 946c729..659f483 100644 --- a/generic/tclAssembly.c +++ b/generic/tclAssembly.c @@ -462,6 +462,8 @@ static const TalInstDesc TalInstructionTable[] = { {"strneq", ASSEM_1BYTE, INST_STR_NEQ, 2, 1}, {"strrange", ASSEM_1BYTE, INST_STR_RANGE, 3, 1}, {"strrfind", ASSEM_1BYTE, INST_STR_FIND_LAST, 2, 1}, + {"strtrimLeft", ASSEM_1BYTE, INST_STRTRIM_LEFT, 2, 1}, + {"strtrimRight", ASSEM_1BYTE, INST_STRTRIM_RIGHT, 2, 1}, {"sub", ASSEM_1BYTE, INST_SUB, 2, 1}, {"tclooClass", ASSEM_1BYTE, INST_TCLOO_CLASS, 1, 1}, {"tclooIsObject", ASSEM_1BYTE, INST_TCLOO_IS_OBJECT, 1, 1}, @@ -502,7 +504,8 @@ static const unsigned char NonThrowingByteCodes[] = { INST_COROUTINE_NAME, /* 149 */ INST_NS_CURRENT, /* 151 */ INST_INFO_LEVEL_NUM, /* 152 */ - INST_RESOLVE_COMMAND /* 154 */ + INST_RESOLVE_COMMAND, /* 154 */ + INST_STRTRIM_LEFT, INST_STRTRIM_RIGHT /* 166,167 */ }; /* diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 5087fbb..2b5e995 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -35,6 +35,8 @@ static int UniCharIsHexDigit(int character); /* * Default set of characters to trim in [string trim] and friends. This is a * UTF-8 literal string containing all Unicode space characters [TIP #413] + * + * Synch with tclCompCmdsSZ.c */ #define DEFAULT_TRIM_SET \ @@ -3342,9 +3344,9 @@ TclInitStringCmd( {"tolower", StringLowerCmd, TclCompileBasic1To3ArgCmd, NULL, NULL, 0}, {"toupper", StringUpperCmd, TclCompileBasic1To3ArgCmd, NULL, NULL, 0}, {"totitle", StringTitleCmd, TclCompileBasic1To3ArgCmd, NULL, NULL, 0}, - {"trim", StringTrimCmd, TclCompileBasic1Or2ArgCmd, NULL, NULL, 0}, - {"trimleft", StringTrimLCmd, TclCompileBasic1Or2ArgCmd, NULL, NULL, 0}, - {"trimright", StringTrimRCmd, TclCompileBasic1Or2ArgCmd, NULL, NULL, 0}, + {"trim", StringTrimCmd, TclCompileStringTrimCmd, NULL, NULL, 0}, + {"trimleft", StringTrimLCmd, TclCompileStringTrimLCmd, NULL, NULL, 0}, + {"trimright", StringTrimRCmd, TclCompileStringTrimRCmd, NULL, NULL, 0}, {"wordend", StringEndCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0}, {"wordstart", StringStartCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0} diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c index 44cb66e..0177b2d 100644 --- a/generic/tclCompCmdsSZ.c +++ b/generic/tclCompCmdsSZ.c @@ -640,6 +640,141 @@ TclCompileStringRangeCmd( OP( STR_RANGE); return TCL_OK; } + +/* + * Synch with tclCmdMZ.c + */ + +#define DEFAULT_TRIM_SET \ + "\x09\x0a\x0b\x0c\x0d " /* ASCII */\ + "\xc0\x80" /* nul (U+0000) */\ + "\xc2\x85" /* next line (U+0085) */\ + "\xc2\xa0" /* non-breaking space (U+00a0) */\ + "\xe1\x9a\x80" /* ogham space mark (U+1680) */ \ + "\xe1\xa0\x8e" /* mongolian vowel separator (U+180e) */\ + "\xe2\x80\x80" /* en quad (U+2000) */\ + "\xe2\x80\x81" /* em quad (U+2001) */\ + "\xe2\x80\x82" /* en space (U+2002) */\ + "\xe2\x80\x83" /* em space (U+2003) */\ + "\xe2\x80\x84" /* three-per-em space (U+2004) */\ + "\xe2\x80\x85" /* four-per-em space (U+2005) */\ + "\xe2\x80\x86" /* six-per-em space (U+2006) */\ + "\xe2\x80\x87" /* figure space (U+2007) */\ + "\xe2\x80\x88" /* punctuation space (U+2008) */\ + "\xe2\x80\x89" /* thin space (U+2009) */\ + "\xe2\x80\x8a" /* hair space (U+200a) */\ + "\xe2\x80\x8b" /* zero width space (U+200b) */\ + "\xe2\x80\xa8" /* line separator (U+2028) */\ + "\xe2\x80\xa9" /* paragraph separator (U+2029) */\ + "\xe2\x80\xaf" /* narrow no-break space (U+202f) */\ + "\xe2\x81\x9f" /* medium mathematical space (U+205f) */\ + "\xe2\x81\xa0" /* word joiner (U+2060) */\ + "\xe3\x80\x80" /* ideographic space (U+3000) */\ + "\xef\xbb\xbf" /* zero width no-break space (U+feff) */ + +int +TclCompileStringTrimLCmd( + Tcl_Interp *interp, /* Used for error reporting. */ + Tcl_Parse *parsePtr, /* Points to a parse structure for the command + * created by Tcl_ParseCommand. */ + Command *cmdPtr, /* Points to defintion of command being + * compiled. */ + CompileEnv *envPtr) /* Holds resulting instructions. */ +{ + DefineLineInformation; /* TIP #280 */ + Tcl_Token *tokenPtr; + + if (parsePtr->numWords != 2 && parsePtr->numWords != 3) { + return TCL_ERROR; + } + + tokenPtr = TokenAfter(parsePtr->tokenPtr); + CompileWord(envPtr, tokenPtr, interp, 1); + if (parsePtr->numWords == 3) { + tokenPtr = TokenAfter(tokenPtr); + CompileWord(envPtr, tokenPtr, interp, 2); + } else { + PushLiteral(envPtr, DEFAULT_TRIM_SET, strlen(DEFAULT_TRIM_SET)); + } + OP( STRTRIM_LEFT); + return TCL_OK; +} + +int +TclCompileStringTrimRCmd( + Tcl_Interp *interp, /* Used for error reporting. */ + Tcl_Parse *parsePtr, /* Points to a parse structure for the command + * created by Tcl_ParseCommand. */ + Command *cmdPtr, /* Points to defintion of command being + * compiled. */ + CompileEnv *envPtr) /* Holds resulting instructions. */ +{ + DefineLineInformation; /* TIP #280 */ + Tcl_Token *tokenPtr; + + if (parsePtr->numWords != 2 && parsePtr->numWords != 3) { + return TCL_ERROR; + } + + tokenPtr = TokenAfter(parsePtr->tokenPtr); + CompileWord(envPtr, tokenPtr, interp, 1); + if (parsePtr->numWords == 3) { + tokenPtr = TokenAfter(tokenPtr); + CompileWord(envPtr, tokenPtr, interp, 2); + } else { + PushLiteral(envPtr, DEFAULT_TRIM_SET, strlen(DEFAULT_TRIM_SET)); + } + OP( STRTRIM_RIGHT); + return TCL_OK; +} + +int +TclCompileStringTrimCmd( + Tcl_Interp *interp, /* Used for error reporting. */ + Tcl_Parse *parsePtr, /* Points to a parse structure for the command + * created by Tcl_ParseCommand. */ + Command *cmdPtr, /* Points to defintion of command being + * compiled. */ + CompileEnv *envPtr) /* Holds resulting instructions. */ +{ + DefineLineInformation; /* TIP #280 */ + Tcl_Token *tokenPtr; + Tcl_Obj *objPtr; + + if (parsePtr->numWords != 2 && parsePtr->numWords != 3) { + return TCL_ERROR; + } + + tokenPtr = TokenAfter(parsePtr->tokenPtr); + CompileWord(envPtr, tokenPtr, interp, 1); + if (parsePtr->numWords == 3) { + tokenPtr = TokenAfter(tokenPtr); + TclNewObj(objPtr); + if (TclWordKnownAtCompileTime(tokenPtr, objPtr)) { + int len; + const char *p = Tcl_GetStringFromObj(objPtr, &len); + + PushLiteral(envPtr, p, len); + OP( STRTRIM_LEFT); + PushLiteral(envPtr, p, len); + OP( STRTRIM_RIGHT); + } else { + CompileWord(envPtr, tokenPtr, interp, 2); + OP4( REVERSE, 2); + OP4( OVER, 1); + OP( STRTRIM_LEFT); + OP4( REVERSE, 2); + OP( STRTRIM_RIGHT); + } + TclDecrRefCount(objPtr); + } else { + PushLiteral(envPtr, DEFAULT_TRIM_SET, strlen(DEFAULT_TRIM_SET)); + OP( STRTRIM_LEFT); + PushLiteral(envPtr, DEFAULT_TRIM_SET, strlen(DEFAULT_TRIM_SET)); + OP( STRTRIM_RIGHT); + } + return TCL_OK; +} /* *---------------------------------------------------------------------- diff --git a/generic/tclCompile.c b/generic/tclCompile.c index d15ef3a..cdedbda 100644 --- a/generic/tclCompile.c +++ b/generic/tclCompile.c @@ -545,6 +545,17 @@ InstructionDesc const tclInstructionTable[] = { /* Drops an element from the auxiliary stack, popping stack elements * until the matching stack depth is reached. */ + {"strtrimLeft", 1, -1, 0, {OPERAND_NONE}}, + /* [string trimleft] core: removes the characters (designated by the + * value at the top of the stack) from the left of the string and + * pushes the resulting string. + * Stack: ... string charset => ... trimmedString */ + {"strtrimRight", 1, -1, 0, {OPERAND_NONE}}, + /* [string trimright] core: removes the characters (designated by the + * value at the top of the stack) from the right of the string and + * pushes the resulting string. + * Stack: ... string charset => ... trimmedString */ + {NULL, 0, 0, 0, {OPERAND_NONE}} }; diff --git a/generic/tclCompile.h b/generic/tclCompile.h index 5660055..08eb393 100644 --- a/generic/tclCompile.h +++ b/generic/tclCompile.h @@ -768,8 +768,12 @@ typedef struct ByteCode { #define INST_EXPAND_DROP 165 +/* For compilation of [string trim] and related */ +#define INST_STRTRIM_LEFT 166 +#define INST_STRTRIM_RIGHT 167 + /* The last opcode */ -#define LAST_INST_OPCODE 165 +#define LAST_INST_OPCODE 167 /* * Table describing the Tcl bytecode instructions: their name (for displaying diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 0ca393b..b4785bf 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5252,6 +5252,39 @@ TEBCresume( objResultPtr = TCONST(match); NEXT_INST_F(0, 2, 1); + { + const char *string1, *string2; + + case INST_STRTRIM_LEFT: + valuePtr = OBJ_UNDER_TOS; /* String */ + value2Ptr = OBJ_AT_TOS; /* TrimSet */ + string2 = TclGetStringFromObj(value2Ptr, &length2); + string1 = TclGetStringFromObj(valuePtr, &length); + match = TclTrimLeft(string1, length, string2, length2); + if (match == 0) { + TRACE_WITH_OBJ(("\"%.30s\" \"%.30s\" => ", valuePtr, value2Ptr), + valuePtr); + NEXT_INST_F(1, 1, 0); + } else { + objResultPtr = Tcl_NewStringObj(string1+match, length-match); + TRACE_WITH_OBJ(("\"%.30s\" \"%.30s\" => ", valuePtr, value2Ptr), + objResultPtr); + NEXT_INST_F(1, 2, 1); + } + case INST_STRTRIM_RIGHT: + valuePtr = OBJ_UNDER_TOS; /* String */ + value2Ptr = OBJ_AT_TOS; /* TrimSet */ + string2 = TclGetStringFromObj(value2Ptr, &length2); + string1 = TclGetStringFromObj(valuePtr, &length); + match = TclTrimRight(string1, length, string2, length2); + if (match == 0) { + NEXT_INST_F(1, 1, 0); + } else { + objResultPtr = Tcl_NewStringObj(string1, length-match); + NEXT_INST_F(1, 2, 1); + } + } + case INST_REGEXP: cflags = TclGetInt1AtPtr(pc+1); /* RE compile flages like NOCASE */ valuePtr = OBJ_AT_TOS; /* String */ diff --git a/generic/tclInt.h b/generic/tclInt.h index feea6dd..2312734 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3614,6 +3614,15 @@ MODULE_SCOPE int TclCompileStringMatchCmd(Tcl_Interp *interp, MODULE_SCOPE int TclCompileStringRangeCmd(Tcl_Interp *interp, Tcl_Parse *parsePtr, Command *cmdPtr, struct CompileEnv *envPtr); +MODULE_SCOPE int TclCompileStringTrimCmd(Tcl_Interp *interp, + Tcl_Parse *parsePtr, Command *cmdPtr, + struct CompileEnv *envPtr); +MODULE_SCOPE int TclCompileStringTrimLCmd(Tcl_Interp *interp, + Tcl_Parse *parsePtr, Command *cmdPtr, + struct CompileEnv *envPtr); +MODULE_SCOPE int TclCompileStringTrimRCmd(Tcl_Interp *interp, + Tcl_Parse *parsePtr, Command *cmdPtr, + struct CompileEnv *envPtr); MODULE_SCOPE int TclCompileSubstCmd(Tcl_Interp *interp, Tcl_Parse *parsePtr, Command *cmdPtr, struct CompileEnv *envPtr); -- cgit v0.12