summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorhobbs <hobbs>2010-09-25 02:25:54 (GMT)
committerhobbs <hobbs>2010-09-25 02:25:54 (GMT)
commit2dac256b72f43d6d47ba902d97c17f0a7445a12e (patch)
tree2cba95f3b1f1df48d659ee9f4df5343af212c3ea
parent87af30e0e77b0228943003952af1742423ed0b6d (diff)
downloadtcl-2dac256b72f43d6d47ba902d97c17f0a7445a12e.zip
tcl-2dac256b72f43d6d47ba902d97c17f0a7445a12e.tar.gz
tcl-2dac256b72f43d6d47ba902d97c17f0a7445a12e.tar.bz2
* tests/stringComp.test: improved string test coverage
* generic/tclExecute.c (TclExecuteByteCode): merge INST_STR_CMP and INST_STR_EQ/INST_STR_NEQ paths. Speeds up eq/ne/[string eq] with obj-aware comparisons and eq/==/ne/!= with length equality check.
-rw-r--r--ChangeLog8
-rw-r--r--generic/tclExecute.c168
-rw-r--r--tests/stringComp.test320
3 files changed, 232 insertions, 264 deletions
diff --git a/ChangeLog b/ChangeLog
index 20c82ff..7163a85 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2010-09-24 Jeff Hobbs <jeffh@ActiveState.com>
+
+ * tests/stringComp.test: improved string eq/cmp test coverage
+ * generic/tclExecute.c (TclExecuteByteCode): merge INST_STR_CMP
+ and INST_STR_EQ/INST_STR_NEQ paths. Speeds up eq/ne/[string eq]
+ with obj-aware comparisons and eq/==/ne/!= with length equality
+ check.
+
2010-09-24 Andreas Kupries <andreask@activestate.com>
* tclWinsock.c: [Bug 3056775]: Fixed race condition between thread
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index a6fae67..fa792df 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -14,7 +14,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclExecute.c,v 1.497 2010/09/22 18:37:27 msofer Exp $
+ * RCS: @(#) $Id: tclExecute.c,v 1.498 2010/09/25 02:25:54 hobbs Exp $
*/
#include "tclInt.h"
@@ -4528,114 +4528,90 @@ TclExecuteByteCode(
case INST_STR_EQ:
case INST_STR_NEQ: /* String (in)equality check */
- /*
- * TODO: Consider merging into INST_STR_CMP
- */
-
+ case INST_STR_CMP: /* String compare. */
+ stringCompare:
value2Ptr = OBJ_AT_TOS;
valuePtr = OBJ_UNDER_TOS;
if (valuePtr == value2Ptr) {
+ match = 0;
+ } else {
/*
- * On the off-chance that the objects are the same, we don't
- * really have to think hard about equality.
+ * We only need to check (in)equality when we have equal length
+ * strings. We can use memcmp in all (n)eq cases because we
+ * don't need to worry about lexical LE/BE variance.
*/
-
- match = (*pc == INST_STR_EQ);
- } else {
- s1 = TclGetStringFromObj(valuePtr, &s1len);
- s2 = TclGetStringFromObj(value2Ptr, &s2len);
- if (s1len == s2len) {
+ typedef int (*memCmpFn_t)(const void*, const void*, size_t);
+ memCmpFn_t memCmpFn;
+ int checkEq = ((*pc == INST_EQ) || (*pc == INST_NEQ)
+ || (*pc == INST_STR_EQ) || (*pc == INST_STR_NEQ));
+
+ if (TclIsPureByteArray(valuePtr)
+ && TclIsPureByteArray(value2Ptr)) {
+ s1 = (char *) Tcl_GetByteArrayFromObj(valuePtr, &s1len);
+ s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len);
+ memCmpFn = memcmp;
+ } else if (((valuePtr->typePtr == &tclStringType)
+ && (value2Ptr->typePtr == &tclStringType))) {
/*
- * We only need to check (in)equality when we have equal
- * length strings.
+ * Do a unicode-specific comparison if both of the args are of
+ * String type. If the char length == byte length, we can do a
+ * memcmp. In benchmark testing this proved the most efficient
+ * check between the unicode and string comparison operations.
*/
- if (*pc == INST_STR_NEQ) {
- match = (memcmp(s1, s2, s1len) != 0);
+ s1len = Tcl_GetCharLength(valuePtr);
+ s2len = Tcl_GetCharLength(value2Ptr);
+ if ((s1len == valuePtr->length)
+ && (s2len == value2Ptr->length)) {
+ s1 = valuePtr->bytes;
+ s2 = value2Ptr->bytes;
+ memCmpFn = memcmp;
} else {
- /* INST_STR_EQ */
- match = (memcmp(s1, s2, s1len) == 0);
+ s1 = (char *) Tcl_GetUnicode(valuePtr);
+ s2 = (char *) Tcl_GetUnicode(value2Ptr);
+ if (
+#ifdef WORDS_BIGENDIAN
+ 1
+#else
+ checkEq
+#endif
+ ) {
+ memCmpFn = memcmp;
+ s1len *= sizeof(Tcl_UniChar);
+ s2len *= sizeof(Tcl_UniChar);
+ } else {
+ memCmpFn = (memCmpFn_t) Tcl_UniCharNcmp;
+ }
}
} else {
- match = (*pc == INST_STR_NEQ);
- }
- }
-
- TRACE(("%.20s %.20s => %d\n", O2S(valuePtr),O2S(value2Ptr),match));
-
- /*
- * Peep-hole optimisation: if you're about to jump, do jump from here.
- */
-
- pc++;
-#ifndef TCL_COMPILE_DEBUG
- switch (*pc) {
- case INST_JUMP_FALSE1:
- NEXT_INST_F((match? 2 : TclGetInt1AtPtr(pc+1)), 2, 0);
- case INST_JUMP_TRUE1:
- NEXT_INST_F((match? TclGetInt1AtPtr(pc+1) : 2), 2, 0);
- case INST_JUMP_FALSE4:
- NEXT_INST_F((match? 5 : TclGetInt4AtPtr(pc+1)), 2, 0);
- case INST_JUMP_TRUE4:
- NEXT_INST_F((match? TclGetInt4AtPtr(pc+1) : 5), 2, 0);
- }
-#endif
- objResultPtr = TCONST(match);
- NEXT_INST_F(0, 2, 1);
-
- stringCompare:
- case INST_STR_CMP: /* String compare. */
- value2Ptr = OBJ_AT_TOS;
- valuePtr = OBJ_UNDER_TOS;
-
- /*
- * The comparison function should compare up to the minimum byte
- * length only.
- */
-
- if (valuePtr == value2Ptr) {
- /*
- * In the pure equality case, set lengths too for the checks below
- * (or we could goto beyond it).
- */
+ /*
+ * strcmp can't do a simple memcmp in order to handle the
+ * special Tcl \xC0\x80 null encoding for utf-8.
+ */
- match = s1len = s2len = 0;
- } else if (TclIsPureByteArray(valuePtr)
- && TclIsPureByteArray(value2Ptr)) {
- s1 = (char *) Tcl_GetByteArrayFromObj(valuePtr, &s1len);
- s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len);
- match = memcmp(s1, s2,
- (size_t) ((s1len < s2len) ? s1len : s2len));
- } else if (((valuePtr->typePtr == &tclStringType)
- && (value2Ptr->typePtr == &tclStringType))) {
- /*
- * Do a unicode-specific comparison if both of the args are of
- * String type. If the char length == byte length, we can do a
- * memcmp. In benchmark testing this proved the most efficient
- * check between the unicode and string comparison operations.
- */
+ s1 = TclGetStringFromObj(valuePtr, &s1len);
+ s2 = TclGetStringFromObj(value2Ptr, &s2len);
+ if (checkEq) {
+ memCmpFn = memcmp;
+ } else {
+ memCmpFn = (memCmpFn_t) TclpUtfNcmp2;
+ }
+ }
- s1len = Tcl_GetCharLength(valuePtr);
- s2len = Tcl_GetCharLength(value2Ptr);
- if ((s1len == valuePtr->length) && (s2len == value2Ptr->length)) {
- match = memcmp(valuePtr->bytes, value2Ptr->bytes,
- (unsigned) ((s1len < s2len) ? s1len : s2len));
+ if (checkEq && (s1len != s2len)) {
+ match = 1;
} else {
- match = TclUniCharNcmp(Tcl_GetUnicode(valuePtr),
- Tcl_GetUnicode(value2Ptr),
- (unsigned) ((s1len < s2len) ? s1len : s2len));
+ /*
+ * The comparison function should compare up to the minimum
+ * byte length only.
+ */
+ match = memCmpFn(s1, s2,
+ (size_t) ((s1len < s2len) ? s1len : s2len));
+ if (match == 0) {
+ match = s1len - s2len;
+ }
}
- } else {
- /*
- * We can't do a simple memcmp in order to handle the special Tcl
- * \xC0\x80 null encoding for utf-8.
- */
-
- s1 = TclGetStringFromObj(valuePtr, &s1len);
- s2 = TclGetStringFromObj(value2Ptr, &s2len);
- match = TclpUtfNcmp2(s1, s2,
- (size_t) ((s1len < s2len) ? s1len : s2len));
}
/*
@@ -4643,19 +4619,17 @@ TclExecuteByteCode(
* TODO: consider peephole opt.
*/
- if (match == 0) {
- match = s1len - s2len;
- }
-
if (*pc != INST_STR_CMP) {
/*
* Take care of the opcodes that goto'ed into here.
*/
switch (*pc) {
+ case INST_STR_EQ:
case INST_EQ:
match = (match == 0);
break;
+ case INST_STR_NEQ:
case INST_NEQ:
match = (match != 0);
break;
diff --git a/tests/stringComp.test b/tests/stringComp.test
index 2f187be..6ef94ee 100644
--- a/tests/stringComp.test
+++ b/tests/stringComp.test
@@ -15,7 +15,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: stringComp.test,v 1.17 2009/06/24 15:17:41 dgp Exp $
+# RCS: @(#) $Id: stringComp.test,v 1.18 2010/09/25 02:25:54 hobbs Exp $
if {[lsearch [namespace children] ::tcltest] == -1} {
package require tcltest
@@ -44,180 +44,166 @@ test stringComp-1.3 {error condition - undefined method during compile} {
foo abc 0
} a
-test stringComp-2.1 {string compare, too few args} {
- proc foo {} {string compare a}
- list [catch {foo} msg] $msg
-} {1 {wrong # args: should be "string compare ?-nocase? ?-length int? string1 string2"}}
-test stringComp-2.2 {string compare, bad args} {
- proc foo {} {string compare a b c}
- list [catch {foo} msg] $msg
-} {1 {bad option "a": must be -nocase or -length}}
-test stringComp-2.3 {string compare, bad args} {
- list [catch {string compare -length -nocase str1 str2} msg] $msg
-} {1 {expected integer but got "-nocase"}}
-test stringComp-2.4 {string compare, too many args} {
- list [catch {string compare -length 10 -nocase str1 str2 str3} msg] $msg
-} {1 {wrong # args: should be "string compare ?-nocase? ?-length int? string1 string2"}}
-test stringComp-2.5 {string compare with length unspecified} {
- list [catch {string compare -length 10 10} msg] $msg
-} {1 {wrong # args: should be "string compare ?-nocase? ?-length int? string1 string2"}}
-test stringComp-2.6 {string compare} {
- proc foo {} {string compare abcde abdef}
- foo
-} -1
-test stringComp-2.7 {string compare, shortest method name} {
- proc foo {} {string c abcde ABCDE}
- foo
-} 1
-test stringComp-2.8 {string compare} {
- proc foo {} {string compare abcde abcde}
- foo
-} 0
-test stringComp-2.9 {string compare with length} {
- proc foo {} {string compare -length 2 abcde abxyz}
- foo
-} 0
-test stringComp-2.10 {string compare with special index} {
- proc foo {} {string compare -length end-3 abcde abxyz}
- list [catch {foo} msg] $msg
-} {1 {expected integer but got "end-3"}}
-test stringComp-2.11 {string compare, unicode} {
- proc foo {} {string compare ab\u7266 ab\u7267}
- foo
-} -1
-test stringComp-2.12 {string compare, high bit} {
- # This test will fail if the underlying comparaison
- # is using signed chars instead of unsigned chars.
- # (like SunOS's default memcmp thus the compat/memcmp.c)
- proc foo {} {string compare "\x80" "@"}
- foo
- # Nb this tests works also in utf8 space because \x80 is
- # translated into a 2 or more bytelength but whose first byte has
- # the high bit set.
-} 1
-test stringComp-2.13 {string compare -nocase} {
- proc foo {} {string compare -nocase abcde abdef}
- foo
-} -1
-test stringComp-2.14 {string compare -nocase} {
- proc foo {} {string c -nocase abcde ABCDE}
- foo
-} 0
-test stringComp-2.15 {string compare -nocase} {
- proc foo {} {string compare -nocase abcde abcde}
- foo
-} 0
-test stringComp-2.16 {string compare -nocase with length} {
- proc foo {} {string compare -length 2 -nocase abcde Abxyz}
- foo
-} 0
-test stringComp-2.17 {string compare -nocase with length} {
- proc foo {} {string compare -nocase -length 3 abcde Abxyz}
- foo
-} -1
-test stringComp-2.18 {string compare -nocase with length <= 0} {
- proc foo {} {string compare -nocase -length -1 abcde AbCdEf}
- foo
-} -1
-test stringComp-2.19 {string compare -nocase with excessive length} {
- proc foo {} {string compare -nocase -length 50 AbCdEf abcde}
- foo
-} 1
-test stringComp-2.20 {string compare -len unicode} {
- # These are strings that are 6 BYTELENGTH long, but the length
- # shouldn't make a different because there are actually 3 CHARS long
- proc foo {} {string compare -len 5 \334\334\334 \334\334\374}
- foo
-} -1
-test stringComp-2.21 {string compare -nocase with special index} {
- proc foo {} {string compare -nocase -length end-3 Abcde abxyz}
- list [catch {foo} msg] $msg
-} {1 {expected integer but got "end-3"}}
-test stringComp-2.22 {string compare, null strings} {
- proc foo {} {string compare "" ""}
- foo
-} 0
-test stringComp-2.23 {string compare, null strings} {
- proc foo {} {string compare "" foo}
- foo
-} -1
-test stringComp-2.24 {string compare, null strings} {
- proc foo {} {string compare foo ""}
- foo
-} 1
-test stringComp-2.25 {string compare -nocase, null strings} {
- proc foo {} {string compare -nocase "" ""}
- foo
-} 0
-test stringComp-2.26 {string compare -nocase, null strings} {
- proc foo {} {string compare -nocase "" foo}
- foo
-} -1
-test stringComp-2.27 {string compare -nocase, null strings} {
- proc foo {} {string compare -nocase foo ""}
- foo
-} 1
-test stringComp-2.28 {string compare with length, unequal strings} {
- proc foo {} {string compare -length 2 abc abde}
- foo
-} 0
-test stringComp-2.29 {string compare with length, unequal strings} {
- proc foo {} {string compare -length 2 ab abde}
- foo
-} 0
-test stringComp-2.30 {string compare with NUL character vs. other ASCII} {
- # Be careful here, since UTF-8 rep comparison with memcmp() of
- # these puts chars in the wrong order
- proc foo {} {string compare \x00 \x01}
- foo
-} -1
-test stringComp-2.31 {string compare, high bit} {
- proc foo {} {string compare "a\x80" "a@"}
- foo
-} 1
-test stringComp-2.32 {string compare, high bit} {
- proc foo {} {string compare "a\x00" "a\x01"}
- foo
-} -1
-test stringComp-2.33 {string compare, high bit} {
- proc foo {} {string compare "\x00\x00" "\x00\x01"}
- foo
-} -1
+## Test string compare|equal over equal constraints
+## Use result for string compare, and negate it for string equal
+## The body will be tested both in and outside a proc
+set i 0
+foreach {tname tbody tresult tcode} {
+ {too few args} {
+ string compare a
+ } {wrong # args: should be "string compare ?-nocase? ?-length int? string1 string2"} {error}
+ {bad args} {
+ string compare a b c
+ } {bad option "a": must be -nocase or -length} {error}
+ {bad args} {
+ string compare -length -nocase str1 str2
+ } {expected integer but got "-nocase"} {error}
+ {too many args} {
+ string compare -length 10 -nocase str1 str2 str3
+ } {wrong # args: should be "string compare ?-nocase? ?-length int? string1 string2"} {error}
+ {compare with length unspecified} {
+ string compare -length 10 10
+ } {wrong # args: should be "string compare ?-nocase? ?-length int? string1 string2"} {error}
+ {basic operation fail} {
+ string compare abcde abdef
+ } {-1} {}
+ {basic operation success} {
+ string compare abcde abcde
+ } {0} {}
+ {with length} {
+ string compare -length 2 abcde abxyz
+ } {0} {}
+ {with special index} {
+ string compare -length end-3 abcde abxyz
+ } {expected integer but got "end-3"} {error}
+ {unicode} {
+ string compare ab\u7266 ab\u7267
+ } {-1} {}
+ {unicode} {string compare \334 \u00dc} 0 {}
+ {unicode} {string compare \334 \u00fc} -1 {}
+ {unicode} {string compare \334\334\334\374\374 \334\334\334\334\334} 1 {}
+ {high bit} {
+ # This test will fail if the underlying comparaison
+ # is using signed chars instead of unsigned chars.
+ # (like SunOS's default memcmp thus the compat/memcmp.c)
+ string compare "\x80" "@"
+ # Nb this tests works also in utf8 space because \x80 is
+ # translated into a 2 or more bytelength but whose first byte has
+ # the high bit set.
+ } {1} {}
+ {-nocase 1} {string compare -nocase abcde abdef} {-1} {}
+ {-nocase 2} {string compare -nocase abcde Abdef} {-1} {}
+ {-nocase 3} {string compare -nocase abcde ABCDE} {0} {}
+ {-nocase 4} {string compare -nocase abcde abcde} {0} {}
+ {-nocase unicode} {
+ string compare -nocase \334 \u00dc
+ } 0 {}
+ {-nocase unicode} {
+ string compare -nocase \334\334\334\374\u00fc \334\334\334\334\334
+ } 0 {}
+ {-nocase with length} {
+ string compare -length 2 -nocase abcde Abxyz
+ } {0} {}
+ {-nocase with length} {
+ string compare -nocase -length 3 abcde Abxyz
+ } {-1} {}
+ {-nocase with length <= 0} {
+ string compare -nocase -length -1 abcde AbCdEf
+ } {-1} {}
+ {-nocase with excessive length} {
+ string compare -nocase -length 50 AbCdEf abcde
+ } {1} {}
+ {-len unicode} {
+ # These are strings that are 6 BYTELENGTH long, but the length
+ # shouldn't make a different because there are actually 3 CHARS long
+ string compare -len 5 \334\334\334 \334\334\374
+ } -1 {}
+ {-nocase with special index} {
+ string compare -nocase -length end-3 Abcde abxyz
+ } {expected integer but got "end-3"} error
+ {null strings} {
+ string compare "" ""
+ } 0 {}
+ {null strings} {
+ string compare "" foo
+ } -1 {}
+ {null strings} {
+ string compare foo ""
+ } 1 {}
+ {-nocase null strings} {
+ string compare -nocase "" ""
+ } 0 {}
+ {-nocase null strings} {
+ string compare -nocase "" foo
+ } -1 {}
+ {-nocase null strings} {
+ string compare -nocase foo ""
+ } 1 {}
+ {with length, unequal strings} {
+ string compare -length 2 abc abde
+ } 0 {}
+ {with length, unequal strings} {
+ string compare -length 2 ab abde
+ } 0 {}
+ {with NUL character vs. other ASCII} {
+ # Be careful here, since UTF-8 rep comparison with memcmp() of
+ # these puts chars in the wrong order
+ string compare \x00 \x01
+ } -1 {}
+ {high bit} {
+ string compare "a\x80" "a@"
+ } 1 {}
+ {high bit} {
+ string compare "a\x00" "a\x01"
+ } -1 {}
+ {high bit} {
+ string compare "\x00\x00" "\x00\x01"
+ } -1 {}
+ {binary equal} {
+ string compare [binary format a100 0] [binary format a100 0]
+ } 0 {}
+ {binary neq} {
+ string compare [binary format a100a 0 1] [binary format a100a 0 0]
+ } 1 {}
+ {binary neq inequal length} {
+ string compare [binary format a20a 0 1] [binary format a100a 0 0]
+ } 1 {}
+} {
+ if {$tname eq ""} { continue }
+ if {$tcode eq ""} { set tcode ok }
+ test stringComp-2.[incr i] "string compare, $tname" \
+ -body [list eval $tbody] \
+ -returnCodes $tcode -result $tresult
+ test stringComp-2.[incr i] "string compare bc, $tname" \
+ -body "[list proc foo {} $tbody];foo" \
+ -returnCodes $tcode -result $tresult
+ if {"error" ni $tcode} {
+ set tresult [expr {!$tresult}]
+ } else {
+ set tresult [string map {compare equal} $tresult]
+ }
+ set tbody [string map {compare equal} $tbody]
+ test stringComp-2.[incr i] "string equal, $tname" \
+ -body [list eval $tbody] \
+ -returnCodes $tcode -result $tresult
+ test stringComp-2.[incr i] "string equal bc, $tname" \
+ -body "[list proc foo {} $tbody];foo" \
+ -returnCodes $tcode -result $tresult
+}
-# only need a few tests on equal, since it uses the same code as
-# string compare, but just modifies the return output
-test stringComp-3.1 {string equal} {
- proc foo {} {string equal abcde abdef}
- foo
-} 0
-test stringComp-3.2 {string equal} {
- proc foo {} {string eq abcde ABCDE}
- foo
-} 0
-test stringComp-3.3 {string equal} {
- proc foo {} {string equal abcde abcde}
- foo
-} 1
-test stringComp-3.4 {string equal -nocase} {
- proc foo {} {string equal -nocase \334\334\334\334\374\374\374\374 \334\334\334\334\334\334\334\334}
+# need a few extra tests short abbr cmd
+test stringComp-3.1 {string compare, shortest method name} {
+ proc foo {} {string c abcde ABCDE}
foo
} 1
-test stringComp-3.5 {string equal -nocase} {
- proc foo {} {string equal -nocase abcde abdef}
+test stringComp-3.2 {string equal, shortest method name} {
+ proc foo {} {string e abcde ABCDE}
foo
} 0
-test stringComp-3.6 {string equal -nocase} {
+test stringComp-3.3 {string equal -nocase} {
proc foo {} {string eq -nocase abcde ABCDE}
foo
} 1
-test stringComp-3.7 {string equal -nocase} {
- proc foo {} {string equal -nocase abcde abcde}
- foo
-} 1
-test stringComp-3.8 {string equal with length, unequal strings} {
- proc foo {} {string equal -length 2 abc abde}
- foo
-} 1
test stringComp-4.1 {string first, too few args} {
proc foo {} {string first a}