* tests/stringComp.test: improved string test coverage

* generic/tclExecute.c (TclExecuteByteCode): merge INST_STR_CMP and INST_STR_EQ/INST_STR_NEQ paths. Speeds up eq/ne/[string eq] with obj-aware comparisons and eq/==/ne/!= with length equality check.
author: hobbs <hobbs> 2010-09-25 02:25:54 (GMT)
committer: hobbs <hobbs> 2010-09-25 02:25:54 (GMT)
commit: 2dac256b72f43d6d47ba902d97c17f0a7445a12e (patch)
tree: 2cba95f3b1f1df48d659ee9f4df5343af212c3ea /generic
parent: 87af30e0e77b0228943003952af1742423ed0b6d (diff)
download: tcl-2dac256b72f43d6d47ba902d97c17f0a7445a12e.zip
tcl-2dac256b72f43d6d47ba902d97c17f0a7445a12e.tar.gz
tcl-2dac256b72f43d6d47ba902d97c17f0a7445a12e.tar.bz2
1 files changed, 71 insertions, 97 deletions
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index a6fae67..fa792df 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -14,7 +14,7 @@
  * See the file "license.terms" for information on usage and redistribution of
  * this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclExecute.c,v 1.497 2010/09/22 18:37:27 msofer Exp $
+ * RCS: @(#) $Id: tclExecute.c,v 1.498 2010/09/25 02:25:54 hobbs Exp $
  */
 
 #include "tclInt.h"
@@ -4528,114 +4528,90 @@ TclExecuteByteCode(
 
     case INST_STR_EQ:
     case INST_STR_NEQ:		/* String (in)equality check */
-	/*
-	 * TODO: Consider merging into INST_STR_CMP
-	 */
-
+    case INST_STR_CMP:		/* String compare. */
+    stringCompare:
 	value2Ptr = OBJ_AT_TOS;
 	valuePtr = OBJ_UNDER_TOS;
 
 	if (valuePtr == value2Ptr) {
+	    match = 0;
+	} else {
 	    /*
-	     * On the off-chance that the objects are the same, we don't
-	     * really have to think hard about equality.
+	     * We only need to check (in)equality when we have equal length
+	     * strings.  We can use memcmp in all (n)eq cases because we
+	     * don't need to worry about lexical LE/BE variance.
 	     */
-
-	    match = (*pc == INST_STR_EQ);
-	} else {
-	    s1 = TclGetStringFromObj(valuePtr, &s1len);
-	    s2 = TclGetStringFromObj(value2Ptr, &s2len);
-	    if (s1len == s2len) {
+	    typedef int (*memCmpFn_t)(const void*, const void*, size_t);
+	    memCmpFn_t memCmpFn;
+	    int checkEq = ((*pc == INST_EQ) || (*pc == INST_NEQ)
+		    || (*pc == INST_STR_EQ) || (*pc == INST_STR_NEQ));
+
+	    if (TclIsPureByteArray(valuePtr)
+		    && TclIsPureByteArray(value2Ptr)) {
+		s1 = (char *) Tcl_GetByteArrayFromObj(valuePtr, &s1len);
+		s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len);
+		memCmpFn = memcmp;
+	    } else if (((valuePtr->typePtr == &tclStringType)
+			    && (value2Ptr->typePtr == &tclStringType))) {
 		/*
-		 * We only need to check (in)equality when we have equal
-		 * length strings.
+		 * Do a unicode-specific comparison if both of the args are of
+		 * String type. If the char length == byte length, we can do a
+		 * memcmp. In benchmark testing this proved the most efficient
+		 * check between the unicode and string comparison operations.
 		 */
 
-		if (*pc == INST_STR_NEQ) {
-		    match = (memcmp(s1, s2, s1len) != 0);
+		s1len = Tcl_GetCharLength(valuePtr);
+		s2len = Tcl_GetCharLength(value2Ptr);
+		if ((s1len == valuePtr->length)
+			&& (s2len == value2Ptr->length)) {
+		    s1 = valuePtr->bytes;
+		    s2 = value2Ptr->bytes;
+		    memCmpFn = memcmp;
 		} else {
-		    /* INST_STR_EQ */
-		    match = (memcmp(s1, s2, s1len) == 0);
+		    s1 = (char *) Tcl_GetUnicode(valuePtr);
+		    s2 = (char *) Tcl_GetUnicode(value2Ptr);
+		    if (
+#ifdef WORDS_BIGENDIAN
+			1
+#else
+			checkEq
+#endif
+			) {
+			memCmpFn = memcmp;
+			s1len *= sizeof(Tcl_UniChar);
+			s2len *= sizeof(Tcl_UniChar);
+		    } else {
+			memCmpFn = (memCmpFn_t) Tcl_UniCharNcmp;
+		    }
 		}
 	    } else {
-		match = (*pc == INST_STR_NEQ);
-	    }
-	}
-
-	TRACE(("%.20s %.20s => %d\n", O2S(valuePtr),O2S(value2Ptr),match));
-
-	/*
-	 * Peep-hole optimisation: if you're about to jump, do jump from here.
-	 */
-
-	pc++;
-#ifndef TCL_COMPILE_DEBUG
-	switch (*pc) {
-	case INST_JUMP_FALSE1:
-	    NEXT_INST_F((match? 2 : TclGetInt1AtPtr(pc+1)), 2, 0);
-	case INST_JUMP_TRUE1:
-	    NEXT_INST_F((match? TclGetInt1AtPtr(pc+1) : 2), 2, 0);
-	case INST_JUMP_FALSE4:
-	    NEXT_INST_F((match? 5 : TclGetInt4AtPtr(pc+1)), 2, 0);
-	case INST_JUMP_TRUE4:
-	    NEXT_INST_F((match? TclGetInt4AtPtr(pc+1) : 5), 2, 0);
-	}
-#endif
-	objResultPtr = TCONST(match);
-	NEXT_INST_F(0, 2, 1);
-
-    stringCompare:
-    case INST_STR_CMP:		/* String compare. */
-	value2Ptr = OBJ_AT_TOS;
-	valuePtr = OBJ_UNDER_TOS;
-
-	/*
-	 * The comparison function should compare up to the minimum byte
-	 * length only.
-	 */
-
-	if (valuePtr == value2Ptr) {
-	    /*
-	     * In the pure equality case, set lengths too for the checks below
-	     * (or we could goto beyond it).
-	     */
+		/*
+		 * strcmp can't do a simple memcmp in order to handle the
+		 * special Tcl \xC0\x80 null encoding for utf-8.
+		 */
 
-	    match = s1len = s2len = 0;
-	} else if (TclIsPureByteArray(valuePtr)
-		&& TclIsPureByteArray(value2Ptr)) {
-	    s1 = (char *) Tcl_GetByteArrayFromObj(valuePtr, &s1len);
-	    s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len);
-	    match = memcmp(s1, s2,
-		    (size_t) ((s1len < s2len) ? s1len : s2len));
-	} else if (((valuePtr->typePtr == &tclStringType)
-		&& (value2Ptr->typePtr == &tclStringType))) {
-	    /*
-	     * Do a unicode-specific comparison if both of the args are of
-	     * String type. If the char length == byte length, we can do a
-	     * memcmp. In benchmark testing this proved the most efficient
-	     * check between the unicode and string comparison operations.
-	     */
+		s1 = TclGetStringFromObj(valuePtr, &s1len);
+		s2 = TclGetStringFromObj(value2Ptr, &s2len);
+		if (checkEq) {
+		    memCmpFn = memcmp;
+		} else {
+		    memCmpFn = (memCmpFn_t) TclpUtfNcmp2;
+		}
+	    }
 
-	    s1len = Tcl_GetCharLength(valuePtr);
-	    s2len = Tcl_GetCharLength(value2Ptr);
-	    if ((s1len == valuePtr->length) && (s2len == value2Ptr->length)) {
-		match = memcmp(valuePtr->bytes, value2Ptr->bytes,
-			(unsigned) ((s1len < s2len) ? s1len : s2len));
+	    if (checkEq && (s1len != s2len)) {
+		match = 1;
 	    } else {
-		match = TclUniCharNcmp(Tcl_GetUnicode(valuePtr),
-			Tcl_GetUnicode(value2Ptr),
-			(unsigned) ((s1len < s2len) ? s1len : s2len));
+		/*
+		 * The comparison function should compare up to the minimum
+		 * byte length only.
+		 */
+		match = memCmpFn(s1, s2,
+			(size_t) ((s1len < s2len) ? s1len : s2len));
+		if (match == 0) {
+		    match = s1len - s2len;
+		}
 	    }
-	} else {
-	    /*
-	     * We can't do a simple memcmp in order to handle the special Tcl
-	     * \xC0\x80 null encoding for utf-8.
-	     */
-
-	    s1 = TclGetStringFromObj(valuePtr, &s1len);
-	    s2 = TclGetStringFromObj(value2Ptr, &s2len);
-	    match = TclpUtfNcmp2(s1, s2,
-		    (size_t) ((s1len < s2len) ? s1len : s2len));
 	}
 
 	/*
@@ -4643,19 +4619,17 @@ TclExecuteByteCode(
 	 * TODO: consider peephole opt.
 	 */
 
-	if (match == 0) {
-	    match = s1len - s2len;
-	}
-
 	if (*pc != INST_STR_CMP) {
 	    /*
 	     * Take care of the opcodes that goto'ed into here.
 	     */
 
 	    switch (*pc) {
+	    case INST_STR_EQ:
 	    case INST_EQ:
 		match = (match == 0);
 		break;
+	    case INST_STR_NEQ:
 	    case INST_NEQ:
 		match = (match != 0);
 		break;
author	hobbs <hobbs>	2010-09-25 02:25:54 (GMT)
committer	hobbs <hobbs>	2010-09-25 02:25:54 (GMT)
commit	2dac256b72f43d6d47ba902d97c17f0a7445a12e (patch)
tree	2cba95f3b1f1df48d659ee9f4df5343af212c3ea /generic
parent	87af30e0e77b0228943003952af1742423ed0b6d (diff)
download	tcl-2dac256b72f43d6d47ba902d97c17f0a7445a12e.zip tcl-2dac256b72f43d6d47ba902d97c17f0a7445a12e.tar.gz tcl-2dac256b72f43d6d47ba902d97c17f0a7445a12e.tar.bz2