summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2009-07-20 09:26:16 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2009-07-20 09:26:16 (GMT)
commit4e8b86c24d420e998e237b3088592b048a63d606 (patch)
tree0821242c98cef16c20283fd13c8aa2e1b871bf4e
parent5c9038b30eadc53a06bd92249b583ab15f71c1c7 (diff)
downloadtcl-4e8b86c24d420e998e237b3088592b048a63d606.zip
tcl-4e8b86c24d420e998e237b3088592b048a63d606.tar.gz
tcl-4e8b86c24d420e998e237b3088592b048a63d606.tar.bz2
Performance boost for [string is].
-rw-r--r--ChangeLog44
-rw-r--r--generic/tclCmdMZ.c105
2 files changed, 94 insertions, 55 deletions
diff --git a/ChangeLog b/ChangeLog
index ac774f8..f8469f6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2009-07-20 Donal K. Fellows <dkf@users.sf.net>
+
+ * generic/tclCmdMZ.c (StringIsCmd): Reorganize so that [string is] is
+ more efficient when parsing things that are correct, at a cost of
+ making the empty string test slightly more costly. With this, the cost
+ of doing [string is integer -strict $x] matches [catch {expr {$x+0}}]
+ in the successful case, and greatly outstrips it in the failing case.
+
2009-07-16 Don Porter <dgp@users.sourceforge.net>
* generic/tclCmdIL.c: Removed unused variables.
@@ -7,39 +15,39 @@
* generic/tclScan.c: Typo in ACCEPT_NAN configuration.
- * generic/tclStrToD.c: Set floating point control register on
- MIPS systems so that the gradual underflow expected by Tcl is
- in effect. [Bug 2819200]
+ * generic/tclStrToD.c: [Bug 2819200]: Set floating point control
+ register on MIPS systems so that the gradual underflow expected by Tcl
+ is in effect.
2009-07-14 Andreas Kupries <andreask@activestate.com>
* generic/tclBasic.c (DeleteInterpProc,TclArgumentBCEnter,
- TclArgumentBCRelease, TclArgumentGet):
+ (TclArgumentBCRelease, TclArgumentGet):
* generic/tclCompile.c (EnterCmdWordIndex, TclCleanupByteCode,
- TclInitCompileEnv, TclCompileScript):
+ (TclInitCompileEnv, TclCompileScript):
* generic/tclCompile.h (ExtCmdLoc):
* generic/tclExecute.c (TclExecuteByteCode):
* generic/tclInt.h (ExtIndex, CFWordBC):
* tests/info.test (info-39.0):
Backport of some changes made to the Tcl head, to handle literal
- sharing better. The code here is much simpler (trimmed down)
- compared to the head as the 8.5 branch is not bytecode compiling
- whole files, and doesn't compile eval'd code either.
+ sharing better. The code here is much simpler (trimmed down) compared
+ to the head as the 8.5 branch is not bytecode compiling whole files,
+ and doesn't compile eval'd code either.
- Reworked the handling of literal command arguments in bytecode to
- be saved (compiler) and used (execution) per command (See the
- TCL_INVOKE_STK* instructions), and not per the whole bytecode.
- This removes the problems with location data caused by literal
- sharing in proc bodies. Simplified the associated datastructures
- (ExtIndex is gone, as is the function EnterCmdWordIndex).
+ Reworked the handling of literal command arguments in bytecode to be
+ saved (compiler) and used (execution) per command (see the
+ TCL_INVOKE_STK* instructions), and not per the whole bytecode. This
+ removes the problems with location data caused by literal sharing in
+ proc bodies. Simplified the associated datastructures (ExtIndex is
+ gone, as is the function EnterCmdWordIndex).
2009-07-01 Pat Thoyts <patthoyts@users.sourceforge.net>
- * win/tclWinInt.h: Handle the GetUserName API call via the
- * win/tclWin32Dll.c: tclWinProcs indirection structure. This
- * win/tclWinInit.c: fixes a problem obtaining the username when
- the USERNAME environment variable is unset [Bug 2806622]
+ * win/tclWinInt.h: [Bug 2806622]: Handle the GetUserName API call
+ * win/tclWin32Dll.c: via the tclWinProcs indirection structure. This
+ * win/tclWinInit.c: fixes a problem obtaining the username when the
+ USERNAME environment variable is unset.
2009-06-15 Don Porter <dgp@users.sourceforge.net>
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 5a88a6f..aa12480 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -15,13 +15,14 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.163.2.3 2009/05/06 20:16:55 dgp Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.163.2.4 2009/07/20 09:26:16 dkf Exp $
*/
#include "tclInt.h"
#include "tclRegexp.h"
static int UniCharIsAscii(int character);
+static int UniCharIsHexDigit(int character);
/*
*----------------------------------------------------------------------
@@ -1383,14 +1384,14 @@ StringIsCmd(
int objc, /* Number of arguments. */
Tcl_Obj *const objv[]) /* Argument objects. */
{
- const char *string1, *string2, *end, *stop;
+ const char *string1, *end, *stop;
Tcl_UniChar ch;
int (*chcomp)(int) = NULL; /* The UniChar comparison function. */
int i, failat = 0, result = 1, strict = 0, index, length1, length2;
Tcl_Obj *objPtr, *failVarObj = NULL;
Tcl_WideInt w;
- static const char *isOptions[] = {
+ static const char *isClasses[] = {
"alnum", "alpha", "ascii", "control",
"boolean", "digit", "double", "false",
"graph", "integer", "list", "lower",
@@ -1398,42 +1399,50 @@ StringIsCmd(
"upper", "wideinteger", "wordchar", "xdigit",
NULL
};
- enum isOptions {
+ enum isClasses {
STR_IS_ALNUM, STR_IS_ALPHA, STR_IS_ASCII, STR_IS_CONTROL,
STR_IS_BOOL, STR_IS_DIGIT, STR_IS_DOUBLE, STR_IS_FALSE,
STR_IS_GRAPH, STR_IS_INT, STR_IS_LIST, STR_IS_LOWER,
- STR_IS_PRINT, STR_IS_PUNCT, STR_IS_SPACE, STR_IS_TRUE,
+ STR_IS_PRINT, STR_IS_PUNCT, STR_IS_SPACE, STR_IS_TRUE,
STR_IS_UPPER, STR_IS_WIDE, STR_IS_WORD, STR_IS_XDIGIT
};
+ static const char *isOptions[] = {
+ "-strict", "-failindex", NULL
+ };
+ enum isOptions {
+ OPT_STRICT, OPT_FAILIDX
+ };
if (objc < 3 || objc > 6) {
Tcl_WrongNumArgs(interp, 1, objv,
"class ?-strict? ?-failindex var? str");
return TCL_ERROR;
}
- if (Tcl_GetIndexFromObj(interp, objv[1], isOptions, "class", 0,
+ if (Tcl_GetIndexFromObj(interp, objv[1], isClasses, "class", 0,
&index) != TCL_OK) {
return TCL_ERROR;
}
if (objc != 3) {
for (i = 2; i < objc-1; i++) {
- string2 = TclGetStringFromObj(objv[i], &length2);
- if ((length2 > 1) &&
- strncmp(string2, "-strict", (size_t) length2) == 0) {
+ int idx2;
+
+ if (Tcl_GetIndexFromObj(interp, objv[i], isOptions, "option", 0,
+ &idx2) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ switch ((enum isOptions) idx2) {
+ case OPT_STRICT:
strict = 1;
- } else if ((length2 > 1) &&
- strncmp(string2, "-failindex", (size_t)length2) == 0){
+ break;
+ case OPT_FAILIDX:
if (i+1 >= objc-1) {
Tcl_WrongNumArgs(interp, 2, objv,
"?-strict? ?-failindex var? str");
return TCL_ERROR;
}
failVarObj = objv[++i];
- } else {
- Tcl_AppendResult(interp, "bad option \"", string2,
- "\": must be -strict or -failindex", NULL);
- return TCL_ERROR;
+ break;
}
}
}
@@ -1446,20 +1455,12 @@ StringIsCmd(
*/
objPtr = objv[objc-1];
- string1 = TclGetStringFromObj(objPtr, &length1);
- if (length1 == 0 && index != STR_IS_LIST) {
- if (strict) {
- result = 0;
- }
- goto str_is_done;
- }
- end = string1 + length1;
/*
* When entering here, result == 1 and failat == 0.
*/
- switch ((enum isOptions) index) {
+ switch ((enum isClasses) index) {
case STR_IS_ALNUM:
chcomp = Tcl_UniCharIsAlnum;
break;
@@ -1473,7 +1474,12 @@ StringIsCmd(
case STR_IS_TRUE:
case STR_IS_FALSE:
if (TCL_OK != Tcl_ConvertToType(NULL, objPtr, &tclBooleanType)) {
- result = 0;
+ if (strict) {
+ result = 0;
+ } else {
+ string1 = TclGetStringFromObj(objPtr, &length1);
+ result = length1 == 0;
+ }
} else if (((index == STR_IS_TRUE) &&
objPtr->internalRep.longValue == 0)
|| ((index == STR_IS_FALSE) &&
@@ -1497,6 +1503,14 @@ StringIsCmd(
(objPtr->typePtr == &tclBignumType)) {
break;
}
+ string1 = TclGetStringFromObj(objPtr, &length1);
+ if (length1 == 0) {
+ if (strict) {
+ result = 0;
+ }
+ goto str_is_done;
+ }
+ end = string1 + length1;
if (TclParseNumber(NULL, objPtr, NULL, NULL, -1,
(const char **) &stop, 0) != TCL_OK) {
result = 0;
@@ -1525,8 +1539,14 @@ StringIsCmd(
}
failedIntParse:
+ string1 = TclGetStringFromObj(objPtr, &length1);
+ if (length1 == 0) {
+ if (strict) {
+ result = 0;
+ }
+ goto str_is_done;
+ }
result = 0;
-
if (failVarObj == NULL) {
/*
* Don't bother computing the failure point if we're not going to
@@ -1535,6 +1555,7 @@ StringIsCmd(
break;
}
+ end = string1 + length1;
if (TclParseNumber(NULL, objPtr, NULL, NULL, -1,
(const char **) &stop, TCL_PARSE_INTEGER_ONLY) == TCL_OK) {
if (stop == end) {
@@ -1583,14 +1604,15 @@ StringIsCmd(
* SetListFromAny().
*/
- const char *elemStart, *nextElem, *limit;
+ const char *elemStart, *nextElem;
int lenRemain, elemSize, hasBrace;
register const char *p;
- limit = string1 + length1;
+ string1 = TclGetStringFromObj(objPtr, &length1);
+ end = string1 + length1;
failat = -1;
for (p=string1, lenRemain=length1; lenRemain > 0;
- p=nextElem, lenRemain=limit-nextElem) {
+ p=nextElem, lenRemain=end-nextElem) {
if (TCL_ERROR == TclFindElement(NULL, p, lenRemain,
&elemStart, &nextElem, &elemSize, &hasBrace)) {
Tcl_Obj *tmpStr;
@@ -1636,17 +1658,19 @@ StringIsCmd(
chcomp = Tcl_UniCharIsWordChar;
break;
case STR_IS_XDIGIT:
- for (; string1 < end; string1++, failat++) {
- /* INTL: We assume unicode is bad for this class. */
- if ((*((unsigned char *)string1) >= 0xC0) ||
- !isxdigit(*(unsigned char *)string1)) {
- result = 0;
- break;
- }
- }
+ chcomp = UniCharIsHexDigit;
break;
}
+
if (chcomp != NULL) {
+ string1 = TclGetStringFromObj(objPtr, &length1);
+ if (length1 == 0) {
+ if (strict) {
+ result = 0;
+ }
+ goto str_is_done;
+ }
+ end = string1 + length1;
for (; string1 < end; string1 += length2, failat++) {
length2 = TclUtfToUniChar(string1, &ch);
if (!chcomp(ch)) {
@@ -1677,6 +1701,13 @@ UniCharIsAscii(
{
return (character >= 0) && (character < 0x80);
}
+
+static int
+UniCharIsHexDigit(
+ int character)
+{
+ return (character >= 0) && (character < 0x80) && isxdigit(character);
+}
/*
*----------------------------------------------------------------------