From 2dac256b72f43d6d47ba902d97c17f0a7445a12e Mon Sep 17 00:00:00 2001 From: hobbs Date: Sat, 25 Sep 2010 02:25:54 +0000 Subject: * tests/stringComp.test: improved string test coverage * generic/tclExecute.c (TclExecuteByteCode): merge INST_STR_CMP and INST_STR_EQ/INST_STR_NEQ paths. Speeds up eq/ne/[string eq] with obj-aware comparisons and eq/==/ne/!= with length equality check. --- ChangeLog | 8 ++ generic/tclExecute.c | 168 +++++++++++--------------- tests/stringComp.test | 320 ++++++++++++++++++++++++-------------------------- 3 files changed, 232 insertions(+), 264 deletions(-) diff --git a/ChangeLog b/ChangeLog index 20c82ff..7163a85 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2010-09-24 Jeff Hobbs + + * tests/stringComp.test: improved string eq/cmp test coverage + * generic/tclExecute.c (TclExecuteByteCode): merge INST_STR_CMP + and INST_STR_EQ/INST_STR_NEQ paths. Speeds up eq/ne/[string eq] + with obj-aware comparisons and eq/==/ne/!= with length equality + check. + 2010-09-24 Andreas Kupries * tclWinsock.c: [Bug 3056775]: Fixed race condition between thread diff --git a/generic/tclExecute.c b/generic/tclExecute.c index a6fae67..fa792df 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -14,7 +14,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclExecute.c,v 1.497 2010/09/22 18:37:27 msofer Exp $ + * RCS: @(#) $Id: tclExecute.c,v 1.498 2010/09/25 02:25:54 hobbs Exp $ */ #include "tclInt.h" @@ -4528,114 +4528,90 @@ TclExecuteByteCode( case INST_STR_EQ: case INST_STR_NEQ: /* String (in)equality check */ - /* - * TODO: Consider merging into INST_STR_CMP - */ - + case INST_STR_CMP: /* String compare. */ + stringCompare: value2Ptr = OBJ_AT_TOS; valuePtr = OBJ_UNDER_TOS; if (valuePtr == value2Ptr) { + match = 0; + } else { /* - * On the off-chance that the objects are the same, we don't - * really have to think hard about equality. + * We only need to check (in)equality when we have equal length + * strings. We can use memcmp in all (n)eq cases because we + * don't need to worry about lexical LE/BE variance. */ - - match = (*pc == INST_STR_EQ); - } else { - s1 = TclGetStringFromObj(valuePtr, &s1len); - s2 = TclGetStringFromObj(value2Ptr, &s2len); - if (s1len == s2len) { + typedef int (*memCmpFn_t)(const void*, const void*, size_t); + memCmpFn_t memCmpFn; + int checkEq = ((*pc == INST_EQ) || (*pc == INST_NEQ) + || (*pc == INST_STR_EQ) || (*pc == INST_STR_NEQ)); + + if (TclIsPureByteArray(valuePtr) + && TclIsPureByteArray(value2Ptr)) { + s1 = (char *) Tcl_GetByteArrayFromObj(valuePtr, &s1len); + s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len); + memCmpFn = memcmp; + } else if (((valuePtr->typePtr == &tclStringType) + && (value2Ptr->typePtr == &tclStringType))) { /* - * We only need to check (in)equality when we have equal - * length strings. + * Do a unicode-specific comparison if both of the args are of + * String type. If the char length == byte length, we can do a + * memcmp. In benchmark testing this proved the most efficient + * check between the unicode and string comparison operations. */ - if (*pc == INST_STR_NEQ) { - match = (memcmp(s1, s2, s1len) != 0); + s1len = Tcl_GetCharLength(valuePtr); + s2len = Tcl_GetCharLength(value2Ptr); + if ((s1len == valuePtr->length) + && (s2len == value2Ptr->length)) { + s1 = valuePtr->bytes; + s2 = value2Ptr->bytes; + memCmpFn = memcmp; } else { - /* INST_STR_EQ */ - match = (memcmp(s1, s2, s1len) == 0); + s1 = (char *) Tcl_GetUnicode(valuePtr); + s2 = (char *) Tcl_GetUnicode(value2Ptr); + if ( +#ifdef WORDS_BIGENDIAN + 1 +#else + checkEq +#endif + ) { + memCmpFn = memcmp; + s1len *= sizeof(Tcl_UniChar); + s2len *= sizeof(Tcl_UniChar); + } else { + memCmpFn = (memCmpFn_t) Tcl_UniCharNcmp; + } } } else { - match = (*pc == INST_STR_NEQ); - } - } - - TRACE(("%.20s %.20s => %d\n", O2S(valuePtr),O2S(value2Ptr),match)); - - /* - * Peep-hole optimisation: if you're about to jump, do jump from here. - */ - - pc++; -#ifndef TCL_COMPILE_DEBUG - switch (*pc) { - case INST_JUMP_FALSE1: - NEXT_INST_F((match? 2 : TclGetInt1AtPtr(pc+1)), 2, 0); - case INST_JUMP_TRUE1: - NEXT_INST_F((match? TclGetInt1AtPtr(pc+1) : 2), 2, 0); - case INST_JUMP_FALSE4: - NEXT_INST_F((match? 5 : TclGetInt4AtPtr(pc+1)), 2, 0); - case INST_JUMP_TRUE4: - NEXT_INST_F((match? TclGetInt4AtPtr(pc+1) : 5), 2, 0); - } -#endif - objResultPtr = TCONST(match); - NEXT_INST_F(0, 2, 1); - - stringCompare: - case INST_STR_CMP: /* String compare. */ - value2Ptr = OBJ_AT_TOS; - valuePtr = OBJ_UNDER_TOS; - - /* - * The comparison function should compare up to the minimum byte - * length only. - */ - - if (valuePtr == value2Ptr) { - /* - * In the pure equality case, set lengths too for the checks below - * (or we could goto beyond it). - */ + /* + * strcmp can't do a simple memcmp in order to handle the + * special Tcl \xC0\x80 null encoding for utf-8. + */ - match = s1len = s2len = 0; - } else if (TclIsPureByteArray(valuePtr) - && TclIsPureByteArray(value2Ptr)) { - s1 = (char *) Tcl_GetByteArrayFromObj(valuePtr, &s1len); - s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len); - match = memcmp(s1, s2, - (size_t) ((s1len < s2len) ? s1len : s2len)); - } else if (((valuePtr->typePtr == &tclStringType) - && (value2Ptr->typePtr == &tclStringType))) { - /* - * Do a unicode-specific comparison if both of the args are of - * String type. If the char length == byte length, we can do a - * memcmp. In benchmark testing this proved the most efficient - * check between the unicode and string comparison operations. - */ + s1 = TclGetStringFromObj(valuePtr, &s1len); + s2 = TclGetStringFromObj(value2Ptr, &s2len); + if (checkEq) { + memCmpFn = memcmp; + } else { + memCmpFn = (memCmpFn_t) TclpUtfNcmp2; + } + } - s1len = Tcl_GetCharLength(valuePtr); - s2len = Tcl_GetCharLength(value2Ptr); - if ((s1len == valuePtr->length) && (s2len == value2Ptr->length)) { - match = memcmp(valuePtr->bytes, value2Ptr->bytes, - (unsigned) ((s1len < s2len) ? s1len : s2len)); + if (checkEq && (s1len != s2len)) { + match = 1; } else { - match = TclUniCharNcmp(Tcl_GetUnicode(valuePtr), - Tcl_GetUnicode(value2Ptr), - (unsigned) ((s1len < s2len) ? s1len : s2len)); + /* + * The comparison function should compare up to the minimum + * byte length only. + */ + match = memCmpFn(s1, s2, + (size_t) ((s1len < s2len) ? s1len : s2len)); + if (match == 0) { + match = s1len - s2len; + } } - } else { - /* - * We can't do a simple memcmp in order to handle the special Tcl - * \xC0\x80 null encoding for utf-8. - */ - - s1 = TclGetStringFromObj(valuePtr, &s1len); - s2 = TclGetStringFromObj(value2Ptr, &s2len); - match = TclpUtfNcmp2(s1, s2, - (size_t) ((s1len < s2len) ? s1len : s2len)); } /* @@ -4643,19 +4619,17 @@ TclExecuteByteCode( * TODO: consider peephole opt. */ - if (match == 0) { - match = s1len - s2len; - } - if (*pc != INST_STR_CMP) { /* * Take care of the opcodes that goto'ed into here. */ switch (*pc) { + case INST_STR_EQ: case INST_EQ: match = (match == 0); break; + case INST_STR_NEQ: case INST_NEQ: match = (match != 0); break; diff --git a/tests/stringComp.test b/tests/stringComp.test index 2f187be..6ef94ee 100644 --- a/tests/stringComp.test +++ b/tests/stringComp.test @@ -15,7 +15,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: stringComp.test,v 1.17 2009/06/24 15:17:41 dgp Exp $ +# RCS: @(#) $Id: stringComp.test,v 1.18 2010/09/25 02:25:54 hobbs Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest @@ -44,180 +44,166 @@ test stringComp-1.3 {error condition - undefined method during compile} { foo abc 0 } a -test stringComp-2.1 {string compare, too few args} { - proc foo {} {string compare a} - list [catch {foo} msg] $msg -} {1 {wrong # args: should be "string compare ?-nocase? ?-length int? string1 string2"}} -test stringComp-2.2 {string compare, bad args} { - proc foo {} {string compare a b c} - list [catch {foo} msg] $msg -} {1 {bad option "a": must be -nocase or -length}} -test stringComp-2.3 {string compare, bad args} { - list [catch {string compare -length -nocase str1 str2} msg] $msg -} {1 {expected integer but got "-nocase"}} -test stringComp-2.4 {string compare, too many args} { - list [catch {string compare -length 10 -nocase str1 str2 str3} msg] $msg -} {1 {wrong # args: should be "string compare ?-nocase? ?-length int? string1 string2"}} -test stringComp-2.5 {string compare with length unspecified} { - list [catch {string compare -length 10 10} msg] $msg -} {1 {wrong # args: should be "string compare ?-nocase? ?-length int? string1 string2"}} -test stringComp-2.6 {string compare} { - proc foo {} {string compare abcde abdef} - foo -} -1 -test stringComp-2.7 {string compare, shortest method name} { - proc foo {} {string c abcde ABCDE} - foo -} 1 -test stringComp-2.8 {string compare} { - proc foo {} {string compare abcde abcde} - foo -} 0 -test stringComp-2.9 {string compare with length} { - proc foo {} {string compare -length 2 abcde abxyz} - foo -} 0 -test stringComp-2.10 {string compare with special index} { - proc foo {} {string compare -length end-3 abcde abxyz} - list [catch {foo} msg] $msg -} {1 {expected integer but got "end-3"}} -test stringComp-2.11 {string compare, unicode} { - proc foo {} {string compare ab\u7266 ab\u7267} - foo -} -1 -test stringComp-2.12 {string compare, high bit} { - # This test will fail if the underlying comparaison - # is using signed chars instead of unsigned chars. - # (like SunOS's default memcmp thus the compat/memcmp.c) - proc foo {} {string compare "\x80" "@"} - foo - # Nb this tests works also in utf8 space because \x80 is - # translated into a 2 or more bytelength but whose first byte has - # the high bit set. -} 1 -test stringComp-2.13 {string compare -nocase} { - proc foo {} {string compare -nocase abcde abdef} - foo -} -1 -test stringComp-2.14 {string compare -nocase} { - proc foo {} {string c -nocase abcde ABCDE} - foo -} 0 -test stringComp-2.15 {string compare -nocase} { - proc foo {} {string compare -nocase abcde abcde} - foo -} 0 -test stringComp-2.16 {string compare -nocase with length} { - proc foo {} {string compare -length 2 -nocase abcde Abxyz} - foo -} 0 -test stringComp-2.17 {string compare -nocase with length} { - proc foo {} {string compare -nocase -length 3 abcde Abxyz} - foo -} -1 -test stringComp-2.18 {string compare -nocase with length <= 0} { - proc foo {} {string compare -nocase -length -1 abcde AbCdEf} - foo -} -1 -test stringComp-2.19 {string compare -nocase with excessive length} { - proc foo {} {string compare -nocase -length 50 AbCdEf abcde} - foo -} 1 -test stringComp-2.20 {string compare -len unicode} { - # These are strings that are 6 BYTELENGTH long, but the length - # shouldn't make a different because there are actually 3 CHARS long - proc foo {} {string compare -len 5 \334\334\334 \334\334\374} - foo -} -1 -test stringComp-2.21 {string compare -nocase with special index} { - proc foo {} {string compare -nocase -length end-3 Abcde abxyz} - list [catch {foo} msg] $msg -} {1 {expected integer but got "end-3"}} -test stringComp-2.22 {string compare, null strings} { - proc foo {} {string compare "" ""} - foo -} 0 -test stringComp-2.23 {string compare, null strings} { - proc foo {} {string compare "" foo} - foo -} -1 -test stringComp-2.24 {string compare, null strings} { - proc foo {} {string compare foo ""} - foo -} 1 -test stringComp-2.25 {string compare -nocase, null strings} { - proc foo {} {string compare -nocase "" ""} - foo -} 0 -test stringComp-2.26 {string compare -nocase, null strings} { - proc foo {} {string compare -nocase "" foo} - foo -} -1 -test stringComp-2.27 {string compare -nocase, null strings} { - proc foo {} {string compare -nocase foo ""} - foo -} 1 -test stringComp-2.28 {string compare with length, unequal strings} { - proc foo {} {string compare -length 2 abc abde} - foo -} 0 -test stringComp-2.29 {string compare with length, unequal strings} { - proc foo {} {string compare -length 2 ab abde} - foo -} 0 -test stringComp-2.30 {string compare with NUL character vs. other ASCII} { - # Be careful here, since UTF-8 rep comparison with memcmp() of - # these puts chars in the wrong order - proc foo {} {string compare \x00 \x01} - foo -} -1 -test stringComp-2.31 {string compare, high bit} { - proc foo {} {string compare "a\x80" "a@"} - foo -} 1 -test stringComp-2.32 {string compare, high bit} { - proc foo {} {string compare "a\x00" "a\x01"} - foo -} -1 -test stringComp-2.33 {string compare, high bit} { - proc foo {} {string compare "\x00\x00" "\x00\x01"} - foo -} -1 +## Test string compare|equal over equal constraints +## Use result for string compare, and negate it for string equal +## The body will be tested both in and outside a proc +set i 0 +foreach {tname tbody tresult tcode} { + {too few args} { + string compare a + } {wrong # args: should be "string compare ?-nocase? ?-length int? string1 string2"} {error} + {bad args} { + string compare a b c + } {bad option "a": must be -nocase or -length} {error} + {bad args} { + string compare -length -nocase str1 str2 + } {expected integer but got "-nocase"} {error} + {too many args} { + string compare -length 10 -nocase str1 str2 str3 + } {wrong # args: should be "string compare ?-nocase? ?-length int? string1 string2"} {error} + {compare with length unspecified} { + string compare -length 10 10 + } {wrong # args: should be "string compare ?-nocase? ?-length int? string1 string2"} {error} + {basic operation fail} { + string compare abcde abdef + } {-1} {} + {basic operation success} { + string compare abcde abcde + } {0} {} + {with length} { + string compare -length 2 abcde abxyz + } {0} {} + {with special index} { + string compare -length end-3 abcde abxyz + } {expected integer but got "end-3"} {error} + {unicode} { + string compare ab\u7266 ab\u7267 + } {-1} {} + {unicode} {string compare \334 \u00dc} 0 {} + {unicode} {string compare \334 \u00fc} -1 {} + {unicode} {string compare \334\334\334\374\374 \334\334\334\334\334} 1 {} + {high bit} { + # This test will fail if the underlying comparaison + # is using signed chars instead of unsigned chars. + # (like SunOS's default memcmp thus the compat/memcmp.c) + string compare "\x80" "@" + # Nb this tests works also in utf8 space because \x80 is + # translated into a 2 or more bytelength but whose first byte has + # the high bit set. + } {1} {} + {-nocase 1} {string compare -nocase abcde abdef} {-1} {} + {-nocase 2} {string compare -nocase abcde Abdef} {-1} {} + {-nocase 3} {string compare -nocase abcde ABCDE} {0} {} + {-nocase 4} {string compare -nocase abcde abcde} {0} {} + {-nocase unicode} { + string compare -nocase \334 \u00dc + } 0 {} + {-nocase unicode} { + string compare -nocase \334\334\334\374\u00fc \334\334\334\334\334 + } 0 {} + {-nocase with length} { + string compare -length 2 -nocase abcde Abxyz + } {0} {} + {-nocase with length} { + string compare -nocase -length 3 abcde Abxyz + } {-1} {} + {-nocase with length <= 0} { + string compare -nocase -length -1 abcde AbCdEf + } {-1} {} + {-nocase with excessive length} { + string compare -nocase -length 50 AbCdEf abcde + } {1} {} + {-len unicode} { + # These are strings that are 6 BYTELENGTH long, but the length + # shouldn't make a different because there are actually 3 CHARS long + string compare -len 5 \334\334\334 \334\334\374 + } -1 {} + {-nocase with special index} { + string compare -nocase -length end-3 Abcde abxyz + } {expected integer but got "end-3"} error + {null strings} { + string compare "" "" + } 0 {} + {null strings} { + string compare "" foo + } -1 {} + {null strings} { + string compare foo "" + } 1 {} + {-nocase null strings} { + string compare -nocase "" "" + } 0 {} + {-nocase null strings} { + string compare -nocase "" foo + } -1 {} + {-nocase null strings} { + string compare -nocase foo "" + } 1 {} + {with length, unequal strings} { + string compare -length 2 abc abde + } 0 {} + {with length, unequal strings} { + string compare -length 2 ab abde + } 0 {} + {with NUL character vs. other ASCII} { + # Be careful here, since UTF-8 rep comparison with memcmp() of + # these puts chars in the wrong order + string compare \x00 \x01 + } -1 {} + {high bit} { + string compare "a\x80" "a@" + } 1 {} + {high bit} { + string compare "a\x00" "a\x01" + } -1 {} + {high bit} { + string compare "\x00\x00" "\x00\x01" + } -1 {} + {binary equal} { + string compare [binary format a100 0] [binary format a100 0] + } 0 {} + {binary neq} { + string compare [binary format a100a 0 1] [binary format a100a 0 0] + } 1 {} + {binary neq inequal length} { + string compare [binary format a20a 0 1] [binary format a100a 0 0] + } 1 {} +} { + if {$tname eq ""} { continue } + if {$tcode eq ""} { set tcode ok } + test stringComp-2.[incr i] "string compare, $tname" \ + -body [list eval $tbody] \ + -returnCodes $tcode -result $tresult + test stringComp-2.[incr i] "string compare bc, $tname" \ + -body "[list proc foo {} $tbody];foo" \ + -returnCodes $tcode -result $tresult + if {"error" ni $tcode} { + set tresult [expr {!$tresult}] + } else { + set tresult [string map {compare equal} $tresult] + } + set tbody [string map {compare equal} $tbody] + test stringComp-2.[incr i] "string equal, $tname" \ + -body [list eval $tbody] \ + -returnCodes $tcode -result $tresult + test stringComp-2.[incr i] "string equal bc, $tname" \ + -body "[list proc foo {} $tbody];foo" \ + -returnCodes $tcode -result $tresult +} -# only need a few tests on equal, since it uses the same code as -# string compare, but just modifies the return output -test stringComp-3.1 {string equal} { - proc foo {} {string equal abcde abdef} - foo -} 0 -test stringComp-3.2 {string equal} { - proc foo {} {string eq abcde ABCDE} - foo -} 0 -test stringComp-3.3 {string equal} { - proc foo {} {string equal abcde abcde} - foo -} 1 -test stringComp-3.4 {string equal -nocase} { - proc foo {} {string equal -nocase \334\334\334\334\374\374\374\374 \334\334\334\334\334\334\334\334} +# need a few extra tests short abbr cmd +test stringComp-3.1 {string compare, shortest method name} { + proc foo {} {string c abcde ABCDE} foo } 1 -test stringComp-3.5 {string equal -nocase} { - proc foo {} {string equal -nocase abcde abdef} +test stringComp-3.2 {string equal, shortest method name} { + proc foo {} {string e abcde ABCDE} foo } 0 -test stringComp-3.6 {string equal -nocase} { +test stringComp-3.3 {string equal -nocase} { proc foo {} {string eq -nocase abcde ABCDE} foo } 1 -test stringComp-3.7 {string equal -nocase} { - proc foo {} {string equal -nocase abcde abcde} - foo -} 1 -test stringComp-3.8 {string equal with length, unequal strings} { - proc foo {} {string equal -length 2 abc abde} - foo -} 1 test stringComp-4.1 {string first, too few args} { proc foo {} {string first a} -- cgit v0.12