summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
authorhobbs <hobbs>2010-09-25 02:25:54 (GMT)
committerhobbs <hobbs>2010-09-25 02:25:54 (GMT)
commit2dac256b72f43d6d47ba902d97c17f0a7445a12e (patch)
tree2cba95f3b1f1df48d659ee9f4df5343af212c3ea /generic
parent87af30e0e77b0228943003952af1742423ed0b6d (diff)
downloadtcl-2dac256b72f43d6d47ba902d97c17f0a7445a12e.zip
tcl-2dac256b72f43d6d47ba902d97c17f0a7445a12e.tar.gz
tcl-2dac256b72f43d6d47ba902d97c17f0a7445a12e.tar.bz2
* tests/stringComp.test: improved string test coverage
* generic/tclExecute.c (TclExecuteByteCode): merge INST_STR_CMP and INST_STR_EQ/INST_STR_NEQ paths. Speeds up eq/ne/[string eq] with obj-aware comparisons and eq/==/ne/!= with length equality check.
Diffstat (limited to 'generic')
-rw-r--r--generic/tclExecute.c168
1 files changed, 71 insertions, 97 deletions
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index a6fae67..fa792df 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -14,7 +14,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclExecute.c,v 1.497 2010/09/22 18:37:27 msofer Exp $
+ * RCS: @(#) $Id: tclExecute.c,v 1.498 2010/09/25 02:25:54 hobbs Exp $
*/
#include "tclInt.h"
@@ -4528,114 +4528,90 @@ TclExecuteByteCode(
case INST_STR_EQ:
case INST_STR_NEQ: /* String (in)equality check */
- /*
- * TODO: Consider merging into INST_STR_CMP
- */
-
+ case INST_STR_CMP: /* String compare. */
+ stringCompare:
value2Ptr = OBJ_AT_TOS;
valuePtr = OBJ_UNDER_TOS;
if (valuePtr == value2Ptr) {
+ match = 0;
+ } else {
/*
- * On the off-chance that the objects are the same, we don't
- * really have to think hard about equality.
+ * We only need to check (in)equality when we have equal length
+ * strings. We can use memcmp in all (n)eq cases because we
+ * don't need to worry about lexical LE/BE variance.
*/
-
- match = (*pc == INST_STR_EQ);
- } else {
- s1 = TclGetStringFromObj(valuePtr, &s1len);
- s2 = TclGetStringFromObj(value2Ptr, &s2len);
- if (s1len == s2len) {
+ typedef int (*memCmpFn_t)(const void*, const void*, size_t);
+ memCmpFn_t memCmpFn;
+ int checkEq = ((*pc == INST_EQ) || (*pc == INST_NEQ)
+ || (*pc == INST_STR_EQ) || (*pc == INST_STR_NEQ));
+
+ if (TclIsPureByteArray(valuePtr)
+ && TclIsPureByteArray(value2Ptr)) {
+ s1 = (char *) Tcl_GetByteArrayFromObj(valuePtr, &s1len);
+ s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len);
+ memCmpFn = memcmp;
+ } else if (((valuePtr->typePtr == &tclStringType)
+ && (value2Ptr->typePtr == &tclStringType))) {
/*
- * We only need to check (in)equality when we have equal
- * length strings.
+ * Do a unicode-specific comparison if both of the args are of
+ * String type. If the char length == byte length, we can do a
+ * memcmp. In benchmark testing this proved the most efficient
+ * check between the unicode and string comparison operations.
*/
- if (*pc == INST_STR_NEQ) {
- match = (memcmp(s1, s2, s1len) != 0);
+ s1len = Tcl_GetCharLength(valuePtr);
+ s2len = Tcl_GetCharLength(value2Ptr);
+ if ((s1len == valuePtr->length)
+ && (s2len == value2Ptr->length)) {
+ s1 = valuePtr->bytes;
+ s2 = value2Ptr->bytes;
+ memCmpFn = memcmp;
} else {
- /* INST_STR_EQ */
- match = (memcmp(s1, s2, s1len) == 0);
+ s1 = (char *) Tcl_GetUnicode(valuePtr);
+ s2 = (char *) Tcl_GetUnicode(value2Ptr);
+ if (
+#ifdef WORDS_BIGENDIAN
+ 1
+#else
+ checkEq
+#endif
+ ) {
+ memCmpFn = memcmp;
+ s1len *= sizeof(Tcl_UniChar);
+ s2len *= sizeof(Tcl_UniChar);
+ } else {
+ memCmpFn = (memCmpFn_t) Tcl_UniCharNcmp;
+ }
}
} else {
- match = (*pc == INST_STR_NEQ);
- }
- }
-
- TRACE(("%.20s %.20s => %d\n", O2S(valuePtr),O2S(value2Ptr),match));
-
- /*
- * Peep-hole optimisation: if you're about to jump, do jump from here.
- */
-
- pc++;
-#ifndef TCL_COMPILE_DEBUG
- switch (*pc) {
- case INST_JUMP_FALSE1:
- NEXT_INST_F((match? 2 : TclGetInt1AtPtr(pc+1)), 2, 0);
- case INST_JUMP_TRUE1:
- NEXT_INST_F((match? TclGetInt1AtPtr(pc+1) : 2), 2, 0);
- case INST_JUMP_FALSE4:
- NEXT_INST_F((match? 5 : TclGetInt4AtPtr(pc+1)), 2, 0);
- case INST_JUMP_TRUE4:
- NEXT_INST_F((match? TclGetInt4AtPtr(pc+1) : 5), 2, 0);
- }
-#endif
- objResultPtr = TCONST(match);
- NEXT_INST_F(0, 2, 1);
-
- stringCompare:
- case INST_STR_CMP: /* String compare. */
- value2Ptr = OBJ_AT_TOS;
- valuePtr = OBJ_UNDER_TOS;
-
- /*
- * The comparison function should compare up to the minimum byte
- * length only.
- */
-
- if (valuePtr == value2Ptr) {
- /*
- * In the pure equality case, set lengths too for the checks below
- * (or we could goto beyond it).
- */
+ /*
+ * strcmp can't do a simple memcmp in order to handle the
+ * special Tcl \xC0\x80 null encoding for utf-8.
+ */
- match = s1len = s2len = 0;
- } else if (TclIsPureByteArray(valuePtr)
- && TclIsPureByteArray(value2Ptr)) {
- s1 = (char *) Tcl_GetByteArrayFromObj(valuePtr, &s1len);
- s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len);
- match = memcmp(s1, s2,
- (size_t) ((s1len < s2len) ? s1len : s2len));
- } else if (((valuePtr->typePtr == &tclStringType)
- && (value2Ptr->typePtr == &tclStringType))) {
- /*
- * Do a unicode-specific comparison if both of the args are of
- * String type. If the char length == byte length, we can do a
- * memcmp. In benchmark testing this proved the most efficient
- * check between the unicode and string comparison operations.
- */
+ s1 = TclGetStringFromObj(valuePtr, &s1len);
+ s2 = TclGetStringFromObj(value2Ptr, &s2len);
+ if (checkEq) {
+ memCmpFn = memcmp;
+ } else {
+ memCmpFn = (memCmpFn_t) TclpUtfNcmp2;
+ }
+ }
- s1len = Tcl_GetCharLength(valuePtr);
- s2len = Tcl_GetCharLength(value2Ptr);
- if ((s1len == valuePtr->length) && (s2len == value2Ptr->length)) {
- match = memcmp(valuePtr->bytes, value2Ptr->bytes,
- (unsigned) ((s1len < s2len) ? s1len : s2len));
+ if (checkEq && (s1len != s2len)) {
+ match = 1;
} else {
- match = TclUniCharNcmp(Tcl_GetUnicode(valuePtr),
- Tcl_GetUnicode(value2Ptr),
- (unsigned) ((s1len < s2len) ? s1len : s2len));
+ /*
+ * The comparison function should compare up to the minimum
+ * byte length only.
+ */
+ match = memCmpFn(s1, s2,
+ (size_t) ((s1len < s2len) ? s1len : s2len));
+ if (match == 0) {
+ match = s1len - s2len;
+ }
}
- } else {
- /*
- * We can't do a simple memcmp in order to handle the special Tcl
- * \xC0\x80 null encoding for utf-8.
- */
-
- s1 = TclGetStringFromObj(valuePtr, &s1len);
- s2 = TclGetStringFromObj(value2Ptr, &s2len);
- match = TclpUtfNcmp2(s1, s2,
- (size_t) ((s1len < s2len) ? s1len : s2len));
}
/*
@@ -4643,19 +4619,17 @@ TclExecuteByteCode(
* TODO: consider peephole opt.
*/
- if (match == 0) {
- match = s1len - s2len;
- }
-
if (*pc != INST_STR_CMP) {
/*
* Take care of the opcodes that goto'ed into here.
*/
switch (*pc) {
+ case INST_STR_EQ:
case INST_EQ:
match = (match == 0);
break;
+ case INST_STR_NEQ:
case INST_NEQ:
match = (match != 0);
break;