From 4e8b86c24d420e998e237b3088592b048a63d606 Mon Sep 17 00:00:00 2001 From: dkf Date: Mon, 20 Jul 2009 09:26:16 +0000 Subject: Performance boost for [string is]. --- ChangeLog | 44 +++++++++++++--------- generic/tclCmdMZ.c | 105 ++++++++++++++++++++++++++++++++++------------------- 2 files changed, 94 insertions(+), 55 deletions(-) diff --git a/ChangeLog b/ChangeLog index ac774f8..f8469f6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2009-07-20 Donal K. Fellows + + * generic/tclCmdMZ.c (StringIsCmd): Reorganize so that [string is] is + more efficient when parsing things that are correct, at a cost of + making the empty string test slightly more costly. With this, the cost + of doing [string is integer -strict $x] matches [catch {expr {$x+0}}] + in the successful case, and greatly outstrips it in the failing case. + 2009-07-16 Don Porter * generic/tclCmdIL.c: Removed unused variables. @@ -7,39 +15,39 @@ * generic/tclScan.c: Typo in ACCEPT_NAN configuration. - * generic/tclStrToD.c: Set floating point control register on - MIPS systems so that the gradual underflow expected by Tcl is - in effect. [Bug 2819200] + * generic/tclStrToD.c: [Bug 2819200]: Set floating point control + register on MIPS systems so that the gradual underflow expected by Tcl + is in effect. 2009-07-14 Andreas Kupries * generic/tclBasic.c (DeleteInterpProc,TclArgumentBCEnter, - TclArgumentBCRelease, TclArgumentGet): + (TclArgumentBCRelease, TclArgumentGet): * generic/tclCompile.c (EnterCmdWordIndex, TclCleanupByteCode, - TclInitCompileEnv, TclCompileScript): + (TclInitCompileEnv, TclCompileScript): * generic/tclCompile.h (ExtCmdLoc): * generic/tclExecute.c (TclExecuteByteCode): * generic/tclInt.h (ExtIndex, CFWordBC): * tests/info.test (info-39.0): Backport of some changes made to the Tcl head, to handle literal - sharing better. The code here is much simpler (trimmed down) - compared to the head as the 8.5 branch is not bytecode compiling - whole files, and doesn't compile eval'd code either. + sharing better. The code here is much simpler (trimmed down) compared + to the head as the 8.5 branch is not bytecode compiling whole files, + and doesn't compile eval'd code either. - Reworked the handling of literal command arguments in bytecode to - be saved (compiler) and used (execution) per command (See the - TCL_INVOKE_STK* instructions), and not per the whole bytecode. - This removes the problems with location data caused by literal - sharing in proc bodies. Simplified the associated datastructures - (ExtIndex is gone, as is the function EnterCmdWordIndex). + Reworked the handling of literal command arguments in bytecode to be + saved (compiler) and used (execution) per command (see the + TCL_INVOKE_STK* instructions), and not per the whole bytecode. This + removes the problems with location data caused by literal sharing in + proc bodies. Simplified the associated datastructures (ExtIndex is + gone, as is the function EnterCmdWordIndex). 2009-07-01 Pat Thoyts - * win/tclWinInt.h: Handle the GetUserName API call via the - * win/tclWin32Dll.c: tclWinProcs indirection structure. This - * win/tclWinInit.c: fixes a problem obtaining the username when - the USERNAME environment variable is unset [Bug 2806622] + * win/tclWinInt.h: [Bug 2806622]: Handle the GetUserName API call + * win/tclWin32Dll.c: via the tclWinProcs indirection structure. This + * win/tclWinInit.c: fixes a problem obtaining the username when the + USERNAME environment variable is unset. 2009-06-15 Don Porter diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 5a88a6f..aa12480 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -15,13 +15,14 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdMZ.c,v 1.163.2.3 2009/05/06 20:16:55 dgp Exp $ + * RCS: @(#) $Id: tclCmdMZ.c,v 1.163.2.4 2009/07/20 09:26:16 dkf Exp $ */ #include "tclInt.h" #include "tclRegexp.h" static int UniCharIsAscii(int character); +static int UniCharIsHexDigit(int character); /* *---------------------------------------------------------------------- @@ -1383,14 +1384,14 @@ StringIsCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - const char *string1, *string2, *end, *stop; + const char *string1, *end, *stop; Tcl_UniChar ch; int (*chcomp)(int) = NULL; /* The UniChar comparison function. */ int i, failat = 0, result = 1, strict = 0, index, length1, length2; Tcl_Obj *objPtr, *failVarObj = NULL; Tcl_WideInt w; - static const char *isOptions[] = { + static const char *isClasses[] = { "alnum", "alpha", "ascii", "control", "boolean", "digit", "double", "false", "graph", "integer", "list", "lower", @@ -1398,42 +1399,50 @@ StringIsCmd( "upper", "wideinteger", "wordchar", "xdigit", NULL }; - enum isOptions { + enum isClasses { STR_IS_ALNUM, STR_IS_ALPHA, STR_IS_ASCII, STR_IS_CONTROL, STR_IS_BOOL, STR_IS_DIGIT, STR_IS_DOUBLE, STR_IS_FALSE, STR_IS_GRAPH, STR_IS_INT, STR_IS_LIST, STR_IS_LOWER, - STR_IS_PRINT, STR_IS_PUNCT, STR_IS_SPACE, STR_IS_TRUE, + STR_IS_PRINT, STR_IS_PUNCT, STR_IS_SPACE, STR_IS_TRUE, STR_IS_UPPER, STR_IS_WIDE, STR_IS_WORD, STR_IS_XDIGIT }; + static const char *isOptions[] = { + "-strict", "-failindex", NULL + }; + enum isOptions { + OPT_STRICT, OPT_FAILIDX + }; if (objc < 3 || objc > 6) { Tcl_WrongNumArgs(interp, 1, objv, "class ?-strict? ?-failindex var? str"); return TCL_ERROR; } - if (Tcl_GetIndexFromObj(interp, objv[1], isOptions, "class", 0, + if (Tcl_GetIndexFromObj(interp, objv[1], isClasses, "class", 0, &index) != TCL_OK) { return TCL_ERROR; } if (objc != 3) { for (i = 2; i < objc-1; i++) { - string2 = TclGetStringFromObj(objv[i], &length2); - if ((length2 > 1) && - strncmp(string2, "-strict", (size_t) length2) == 0) { + int idx2; + + if (Tcl_GetIndexFromObj(interp, objv[i], isOptions, "option", 0, + &idx2) != TCL_OK) { + return TCL_ERROR; + } + switch ((enum isOptions) idx2) { + case OPT_STRICT: strict = 1; - } else if ((length2 > 1) && - strncmp(string2, "-failindex", (size_t)length2) == 0){ + break; + case OPT_FAILIDX: if (i+1 >= objc-1) { Tcl_WrongNumArgs(interp, 2, objv, "?-strict? ?-failindex var? str"); return TCL_ERROR; } failVarObj = objv[++i]; - } else { - Tcl_AppendResult(interp, "bad option \"", string2, - "\": must be -strict or -failindex", NULL); - return TCL_ERROR; + break; } } } @@ -1446,20 +1455,12 @@ StringIsCmd( */ objPtr = objv[objc-1]; - string1 = TclGetStringFromObj(objPtr, &length1); - if (length1 == 0 && index != STR_IS_LIST) { - if (strict) { - result = 0; - } - goto str_is_done; - } - end = string1 + length1; /* * When entering here, result == 1 and failat == 0. */ - switch ((enum isOptions) index) { + switch ((enum isClasses) index) { case STR_IS_ALNUM: chcomp = Tcl_UniCharIsAlnum; break; @@ -1473,7 +1474,12 @@ StringIsCmd( case STR_IS_TRUE: case STR_IS_FALSE: if (TCL_OK != Tcl_ConvertToType(NULL, objPtr, &tclBooleanType)) { - result = 0; + if (strict) { + result = 0; + } else { + string1 = TclGetStringFromObj(objPtr, &length1); + result = length1 == 0; + } } else if (((index == STR_IS_TRUE) && objPtr->internalRep.longValue == 0) || ((index == STR_IS_FALSE) && @@ -1497,6 +1503,14 @@ StringIsCmd( (objPtr->typePtr == &tclBignumType)) { break; } + string1 = TclGetStringFromObj(objPtr, &length1); + if (length1 == 0) { + if (strict) { + result = 0; + } + goto str_is_done; + } + end = string1 + length1; if (TclParseNumber(NULL, objPtr, NULL, NULL, -1, (const char **) &stop, 0) != TCL_OK) { result = 0; @@ -1525,8 +1539,14 @@ StringIsCmd( } failedIntParse: + string1 = TclGetStringFromObj(objPtr, &length1); + if (length1 == 0) { + if (strict) { + result = 0; + } + goto str_is_done; + } result = 0; - if (failVarObj == NULL) { /* * Don't bother computing the failure point if we're not going to @@ -1535,6 +1555,7 @@ StringIsCmd( break; } + end = string1 + length1; if (TclParseNumber(NULL, objPtr, NULL, NULL, -1, (const char **) &stop, TCL_PARSE_INTEGER_ONLY) == TCL_OK) { if (stop == end) { @@ -1583,14 +1604,15 @@ StringIsCmd( * SetListFromAny(). */ - const char *elemStart, *nextElem, *limit; + const char *elemStart, *nextElem; int lenRemain, elemSize, hasBrace; register const char *p; - limit = string1 + length1; + string1 = TclGetStringFromObj(objPtr, &length1); + end = string1 + length1; failat = -1; for (p=string1, lenRemain=length1; lenRemain > 0; - p=nextElem, lenRemain=limit-nextElem) { + p=nextElem, lenRemain=end-nextElem) { if (TCL_ERROR == TclFindElement(NULL, p, lenRemain, &elemStart, &nextElem, &elemSize, &hasBrace)) { Tcl_Obj *tmpStr; @@ -1636,17 +1658,19 @@ StringIsCmd( chcomp = Tcl_UniCharIsWordChar; break; case STR_IS_XDIGIT: - for (; string1 < end; string1++, failat++) { - /* INTL: We assume unicode is bad for this class. */ - if ((*((unsigned char *)string1) >= 0xC0) || - !isxdigit(*(unsigned char *)string1)) { - result = 0; - break; - } - } + chcomp = UniCharIsHexDigit; break; } + if (chcomp != NULL) { + string1 = TclGetStringFromObj(objPtr, &length1); + if (length1 == 0) { + if (strict) { + result = 0; + } + goto str_is_done; + } + end = string1 + length1; for (; string1 < end; string1 += length2, failat++) { length2 = TclUtfToUniChar(string1, &ch); if (!chcomp(ch)) { @@ -1677,6 +1701,13 @@ UniCharIsAscii( { return (character >= 0) && (character < 0x80); } + +static int +UniCharIsHexDigit( + int character) +{ + return (character >= 0) && (character < 0x80) && isxdigit(character); +} /* *---------------------------------------------------------------------- -- cgit v0.12