summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2009-07-20 09:13:44 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2009-07-20 09:13:44 (GMT)
commit6662c3248a0690dd85923e868c6d4d2d0d91cf22 (patch)
treea477c541f6ecfa17894fb265ee1123edb4a3119f
parentd63bf48fae3c37de04f6f82c8bde59587e1ed2aa (diff)
downloadtcl-6662c3248a0690dd85923e868c6d4d2d0d91cf22.zip
tcl-6662c3248a0690dd85923e868c6d4d2d0d91cf22.tar.gz
tcl-6662c3248a0690dd85923e868c6d4d2d0d91cf22.tar.bz2
Performance boost for [string is].
-rw-r--r--ChangeLog8
-rw-r--r--generic/tclCmdMZ.c103
2 files changed, 75 insertions, 36 deletions
diff --git a/ChangeLog b/ChangeLog
index 9023354..31820e9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2009-07-20 Donal K. Fellows <dkf@users.sf.net>
+
+ * generic/tclCmdMZ.c (StringIsCmd): Reorganize so that [string is] is
+ more efficient when parsing things that are correct, at a cost of
+ making the empty string test slightly more costly. With this, the cost
+ of doing [string is integer -strict $x] matches [catch {expr {$x+0}}]
+ in the successful case, and greatly outstrips it in the failing case.
+
2009-07-19 Donal K. Fellows <dkf@users.sf.net>
* generic/tclOO.decls, generic/tclOO.c (Tcl_GetObjectName): Expose a
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 1fb32df..554097f 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -15,7 +15,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.187 2009/07/14 21:47:42 das Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.188 2009/07/20 09:13:44 dkf Exp $
*/
#include "tclInt.h"
@@ -32,6 +32,7 @@ static int TryPostFinal(ClientData data[], Tcl_Interp *interp,
static int TryPostHandler(ClientData data[], Tcl_Interp *interp,
int result);
static int UniCharIsAscii(int character);
+static int UniCharIsHexDigit(int character);
/*
* Default set of characters to trim in [string trim] and friends. This is a
@@ -1410,14 +1411,14 @@ StringIsCmd(
int objc, /* Number of arguments. */
Tcl_Obj *const objv[]) /* Argument objects. */
{
- const char *string1, *string2, *end, *stop;
+ const char *string1, *end, *stop;
Tcl_UniChar ch;
int (*chcomp)(int) = NULL; /* The UniChar comparison function. */
int i, failat = 0, result = 1, strict = 0, index, length1, length2;
Tcl_Obj *objPtr, *failVarObj = NULL;
Tcl_WideInt w;
- static const char *const isOptions[] = {
+ static const char *const isClasses[] = {
"alnum", "alpha", "ascii", "control",
"boolean", "digit", "double", "false",
"graph", "integer", "list", "lower",
@@ -1425,42 +1426,50 @@ StringIsCmd(
"upper", "wideinteger", "wordchar", "xdigit",
NULL
};
- enum isOptions {
+ enum isClasses {
STR_IS_ALNUM, STR_IS_ALPHA, STR_IS_ASCII, STR_IS_CONTROL,
STR_IS_BOOL, STR_IS_DIGIT, STR_IS_DOUBLE, STR_IS_FALSE,
STR_IS_GRAPH, STR_IS_INT, STR_IS_LIST, STR_IS_LOWER,
STR_IS_PRINT, STR_IS_PUNCT, STR_IS_SPACE, STR_IS_TRUE,
STR_IS_UPPER, STR_IS_WIDE, STR_IS_WORD, STR_IS_XDIGIT
};
+ static const char *const isOptions[] = {
+ "-strict", "-failindex", NULL
+ };
+ enum isOptions {
+ OPT_STRICT, OPT_FAILIDX
+ };
if (objc < 3 || objc > 6) {
Tcl_WrongNumArgs(interp, 1, objv,
"class ?-strict? ?-failindex var? str");
return TCL_ERROR;
}
- if (Tcl_GetIndexFromObj(interp, objv[1], isOptions, "class", 0,
+ if (Tcl_GetIndexFromObj(interp, objv[1], isClasses, "class", 0,
&index) != TCL_OK) {
return TCL_ERROR;
}
if (objc != 3) {
for (i = 2; i < objc-1; i++) {
- string2 = TclGetStringFromObj(objv[i], &length2);
- if ((length2 > 1) &&
- strncmp(string2, "-strict", (size_t) length2) == 0) {
+ int idx2;
+
+ if (Tcl_GetIndexFromObj(interp, objv[i], isOptions, "option", 0,
+ &idx2) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ switch ((enum isOptions) idx2) {
+ case OPT_STRICT:
strict = 1;
- } else if ((length2 > 1) &&
- strncmp(string2, "-failindex", (size_t)length2) == 0){
+ break;
+ case OPT_FAILIDX:
if (i+1 >= objc-1) {
Tcl_WrongNumArgs(interp, 2, objv,
"?-strict? ?-failindex var? str");
return TCL_ERROR;
}
failVarObj = objv[++i];
- } else {
- Tcl_AppendResult(interp, "bad option \"", string2,
- "\": must be -strict or -failindex", NULL);
- return TCL_ERROR;
+ break;
}
}
}
@@ -1473,20 +1482,12 @@ StringIsCmd(
*/
objPtr = objv[objc-1];
- string1 = TclGetStringFromObj(objPtr, &length1);
- if (length1 == 0 && index != STR_IS_LIST) {
- if (strict) {
- result = 0;
- }
- goto str_is_done;
- }
- end = string1 + length1;
/*
* When entering here, result == 1 and failat == 0.
*/
- switch ((enum isOptions) index) {
+ switch ((enum isClasses) index) {
case STR_IS_ALNUM:
chcomp = Tcl_UniCharIsAlnum;
break;
@@ -1500,7 +1501,12 @@ StringIsCmd(
case STR_IS_TRUE:
case STR_IS_FALSE:
if (TCL_OK != Tcl_ConvertToType(NULL, objPtr, &tclBooleanType)) {
- result = 0;
+ if (strict) {
+ result = 0;
+ } else {
+ string1 = TclGetStringFromObj(objPtr, &length1);
+ result = length1 == 0;
+ }
} else if (((index == STR_IS_TRUE) &&
objPtr->internalRep.longValue == 0)
|| ((index == STR_IS_FALSE) &&
@@ -1524,6 +1530,14 @@ StringIsCmd(
(objPtr->typePtr == &tclBignumType)) {
break;
}
+ string1 = TclGetStringFromObj(objPtr, &length1);
+ if (length1 == 0) {
+ if (strict) {
+ result = 0;
+ }
+ goto str_is_done;
+ }
+ end = string1 + length1;
if (TclParseNumber(NULL, objPtr, NULL, NULL, -1,
(const char **) &stop, 0) != TCL_OK) {
result = 0;
@@ -1552,8 +1566,14 @@ StringIsCmd(
}
failedIntParse:
+ string1 = TclGetStringFromObj(objPtr, &length1);
+ if (length1 == 0) {
+ if (strict) {
+ result = 0;
+ }
+ goto str_is_done;
+ }
result = 0;
-
if (failVarObj == NULL) {
/*
* Don't bother computing the failure point if we're not going to
@@ -1562,6 +1582,7 @@ StringIsCmd(
break;
}
+ end = string1 + length1;
if (TclParseNumber(NULL, objPtr, NULL, NULL, -1,
(const char **) &stop, TCL_PARSE_INTEGER_ONLY) == TCL_OK) {
if (stop == end) {
@@ -1610,14 +1631,15 @@ StringIsCmd(
* SetListFromAny().
*/
- const char *elemStart, *nextElem, *limit;
+ const char *elemStart, *nextElem;
int lenRemain, elemSize, hasBrace;
register const char *p;
- limit = string1 + length1;
+ string1 = TclGetStringFromObj(objPtr, &length1);
+ end = string1 + length1;
failat = -1;
for (p=string1, lenRemain=length1; lenRemain > 0;
- p=nextElem, lenRemain=limit-nextElem) {
+ p=nextElem, lenRemain=end-nextElem) {
if (TCL_ERROR == TclFindElement(NULL, p, lenRemain,
&elemStart, &nextElem, &elemSize, &hasBrace)) {
Tcl_Obj *tmpStr;
@@ -1663,17 +1685,19 @@ StringIsCmd(
chcomp = Tcl_UniCharIsWordChar;
break;
case STR_IS_XDIGIT:
- for (; string1 < end; string1++, failat++) {
- /* INTL: We assume unicode is bad for this class. */
- if ((*((unsigned char *)string1) >= 0xC0) ||
- !isxdigit(*(unsigned char *)string1)) {
- result = 0;
- break;
- }
- }
+ chcomp = UniCharIsHexDigit;
break;
}
+
if (chcomp != NULL) {
+ string1 = TclGetStringFromObj(objPtr, &length1);
+ if (length1 == 0) {
+ if (strict) {
+ result = 0;
+ }
+ goto str_is_done;
+ }
+ end = string1 + length1;
for (; string1 < end; string1 += length2, failat++) {
length2 = TclUtfToUniChar(string1, &ch);
if (!chcomp(ch)) {
@@ -1704,6 +1728,13 @@ UniCharIsAscii(
{
return (character >= 0) && (character < 0x80);
}
+
+static int
+UniCharIsHexDigit(
+ int character)
+{
+ return (character >= 0) && (character < 0x80) && isxdigit(character);
+}
/*
*----------------------------------------------------------------------