diff options
author | hobbs <hobbs> | 2010-09-25 02:25:54 (GMT) |
---|---|---|
committer | hobbs <hobbs> | 2010-09-25 02:25:54 (GMT) |
commit | 2dac256b72f43d6d47ba902d97c17f0a7445a12e (patch) | |
tree | 2cba95f3b1f1df48d659ee9f4df5343af212c3ea /generic | |
parent | 87af30e0e77b0228943003952af1742423ed0b6d (diff) | |
download | tcl-2dac256b72f43d6d47ba902d97c17f0a7445a12e.zip tcl-2dac256b72f43d6d47ba902d97c17f0a7445a12e.tar.gz tcl-2dac256b72f43d6d47ba902d97c17f0a7445a12e.tar.bz2 |
* tests/stringComp.test: improved string test coverage
* generic/tclExecute.c (TclExecuteByteCode): merge INST_STR_CMP
and INST_STR_EQ/INST_STR_NEQ paths. Speeds up eq/ne/[string eq]
with obj-aware comparisons and eq/==/ne/!= with length equality
check.
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tclExecute.c | 168 |
1 files changed, 71 insertions, 97 deletions
diff --git a/generic/tclExecute.c b/generic/tclExecute.c index a6fae67..fa792df 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -14,7 +14,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclExecute.c,v 1.497 2010/09/22 18:37:27 msofer Exp $ + * RCS: @(#) $Id: tclExecute.c,v 1.498 2010/09/25 02:25:54 hobbs Exp $ */ #include "tclInt.h" @@ -4528,114 +4528,90 @@ TclExecuteByteCode( case INST_STR_EQ: case INST_STR_NEQ: /* String (in)equality check */ - /* - * TODO: Consider merging into INST_STR_CMP - */ - + case INST_STR_CMP: /* String compare. */ + stringCompare: value2Ptr = OBJ_AT_TOS; valuePtr = OBJ_UNDER_TOS; if (valuePtr == value2Ptr) { + match = 0; + } else { /* - * On the off-chance that the objects are the same, we don't - * really have to think hard about equality. + * We only need to check (in)equality when we have equal length + * strings. We can use memcmp in all (n)eq cases because we + * don't need to worry about lexical LE/BE variance. */ - - match = (*pc == INST_STR_EQ); - } else { - s1 = TclGetStringFromObj(valuePtr, &s1len); - s2 = TclGetStringFromObj(value2Ptr, &s2len); - if (s1len == s2len) { + typedef int (*memCmpFn_t)(const void*, const void*, size_t); + memCmpFn_t memCmpFn; + int checkEq = ((*pc == INST_EQ) || (*pc == INST_NEQ) + || (*pc == INST_STR_EQ) || (*pc == INST_STR_NEQ)); + + if (TclIsPureByteArray(valuePtr) + && TclIsPureByteArray(value2Ptr)) { + s1 = (char *) Tcl_GetByteArrayFromObj(valuePtr, &s1len); + s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len); + memCmpFn = memcmp; + } else if (((valuePtr->typePtr == &tclStringType) + && (value2Ptr->typePtr == &tclStringType))) { /* - * We only need to check (in)equality when we have equal - * length strings. + * Do a unicode-specific comparison if both of the args are of + * String type. If the char length == byte length, we can do a + * memcmp. In benchmark testing this proved the most efficient + * check between the unicode and string comparison operations. */ - if (*pc == INST_STR_NEQ) { - match = (memcmp(s1, s2, s1len) != 0); + s1len = Tcl_GetCharLength(valuePtr); + s2len = Tcl_GetCharLength(value2Ptr); + if ((s1len == valuePtr->length) + && (s2len == value2Ptr->length)) { + s1 = valuePtr->bytes; + s2 = value2Ptr->bytes; + memCmpFn = memcmp; } else { - /* INST_STR_EQ */ - match = (memcmp(s1, s2, s1len) == 0); + s1 = (char *) Tcl_GetUnicode(valuePtr); + s2 = (char *) Tcl_GetUnicode(value2Ptr); + if ( +#ifdef WORDS_BIGENDIAN + 1 +#else + checkEq +#endif + ) { + memCmpFn = memcmp; + s1len *= sizeof(Tcl_UniChar); + s2len *= sizeof(Tcl_UniChar); + } else { + memCmpFn = (memCmpFn_t) Tcl_UniCharNcmp; + } } } else { - match = (*pc == INST_STR_NEQ); - } - } - - TRACE(("%.20s %.20s => %d\n", O2S(valuePtr),O2S(value2Ptr),match)); - - /* - * Peep-hole optimisation: if you're about to jump, do jump from here. - */ - - pc++; -#ifndef TCL_COMPILE_DEBUG - switch (*pc) { - case INST_JUMP_FALSE1: - NEXT_INST_F((match? 2 : TclGetInt1AtPtr(pc+1)), 2, 0); - case INST_JUMP_TRUE1: - NEXT_INST_F((match? TclGetInt1AtPtr(pc+1) : 2), 2, 0); - case INST_JUMP_FALSE4: - NEXT_INST_F((match? 5 : TclGetInt4AtPtr(pc+1)), 2, 0); - case INST_JUMP_TRUE4: - NEXT_INST_F((match? TclGetInt4AtPtr(pc+1) : 5), 2, 0); - } -#endif - objResultPtr = TCONST(match); - NEXT_INST_F(0, 2, 1); - - stringCompare: - case INST_STR_CMP: /* String compare. */ - value2Ptr = OBJ_AT_TOS; - valuePtr = OBJ_UNDER_TOS; - - /* - * The comparison function should compare up to the minimum byte - * length only. - */ - - if (valuePtr == value2Ptr) { - /* - * In the pure equality case, set lengths too for the checks below - * (or we could goto beyond it). - */ + /* + * strcmp can't do a simple memcmp in order to handle the + * special Tcl \xC0\x80 null encoding for utf-8. + */ - match = s1len = s2len = 0; - } else if (TclIsPureByteArray(valuePtr) - && TclIsPureByteArray(value2Ptr)) { - s1 = (char *) Tcl_GetByteArrayFromObj(valuePtr, &s1len); - s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len); - match = memcmp(s1, s2, - (size_t) ((s1len < s2len) ? s1len : s2len)); - } else if (((valuePtr->typePtr == &tclStringType) - && (value2Ptr->typePtr == &tclStringType))) { - /* - * Do a unicode-specific comparison if both of the args are of - * String type. If the char length == byte length, we can do a - * memcmp. In benchmark testing this proved the most efficient - * check between the unicode and string comparison operations. - */ + s1 = TclGetStringFromObj(valuePtr, &s1len); + s2 = TclGetStringFromObj(value2Ptr, &s2len); + if (checkEq) { + memCmpFn = memcmp; + } else { + memCmpFn = (memCmpFn_t) TclpUtfNcmp2; + } + } - s1len = Tcl_GetCharLength(valuePtr); - s2len = Tcl_GetCharLength(value2Ptr); - if ((s1len == valuePtr->length) && (s2len == value2Ptr->length)) { - match = memcmp(valuePtr->bytes, value2Ptr->bytes, - (unsigned) ((s1len < s2len) ? s1len : s2len)); + if (checkEq && (s1len != s2len)) { + match = 1; } else { - match = TclUniCharNcmp(Tcl_GetUnicode(valuePtr), - Tcl_GetUnicode(value2Ptr), - (unsigned) ((s1len < s2len) ? s1len : s2len)); + /* + * The comparison function should compare up to the minimum + * byte length only. + */ + match = memCmpFn(s1, s2, + (size_t) ((s1len < s2len) ? s1len : s2len)); + if (match == 0) { + match = s1len - s2len; + } } - } else { - /* - * We can't do a simple memcmp in order to handle the special Tcl - * \xC0\x80 null encoding for utf-8. - */ - - s1 = TclGetStringFromObj(valuePtr, &s1len); - s2 = TclGetStringFromObj(value2Ptr, &s2len); - match = TclpUtfNcmp2(s1, s2, - (size_t) ((s1len < s2len) ? s1len : s2len)); } /* @@ -4643,19 +4619,17 @@ TclExecuteByteCode( * TODO: consider peephole opt. */ - if (match == 0) { - match = s1len - s2len; - } - if (*pc != INST_STR_CMP) { /* * Take care of the opcodes that goto'ed into here. */ switch (*pc) { + case INST_STR_EQ: case INST_EQ: match = (match == 0); break; + case INST_STR_NEQ: case INST_NEQ: match = (match != 0); break; |