* generic/tclInt.decls:

* generic/tclIntDecls.h: * generic/tclStubInit.c: * generic/tclUtf.c: added TclpUtfNcmp2 private command that mirrors Tcl_UtfNcmp, but takes n in bytes, not utf-8 chars. This provides a faster alternative for comparing utf strings internally. (Tcl_UniCharNcmp, Tcl_UniCharNcasecmp): removed the explicit end of string check as it wasn't correct for the function (by doc and logic). * generic/tclCmdMZ.c (Tcl_StringObjCmd): reworked the string equal comparison code to use TclpUtfNcmp2 as well as short-circuit for equal objects or unequal length strings in the equal case. Removed the use of goto and streamlined the other parts. * generic/tclExecute.c (TclExecuteByteCode): added check for object equality in the comparison instructions. Added short-circuit for != length strings in INST_EQ, INST_NEQ and INST_STR_CMP. Reworked INST_STR_CMP to use TclpUtfNcmp2 where appropriate, and only use Tcl_UniCharNcmp when at least one of the objects is a Unicode obj with no utf bytes.
author: hobbs <hobbs> 2002-05-29 09:09:57 (GMT)
committer: hobbs <hobbs> 2002-05-29 09:09:57 (GMT)
commit: 3a1941e7d007e93449aa22085687a8fafaddad7e (patch)
tree: 9794bcbc0e094b9645add8339f0278666543ef7e /generic/tclCmdMZ.c
parent: 514dd2a61babad4abc78895bdadec335dd9b4c71 (diff)
download: tcl-3a1941e7d007e93449aa22085687a8fafaddad7e.zip
tcl-3a1941e7d007e93449aa22085687a8fafaddad7e.tar.gz
tcl-3a1941e7d007e93449aa22085687a8fafaddad7e.tar.bz2
1 files changed, 62 insertions, 106 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 0f39ecb..a3b03df 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -14,7 +14,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.67 2002/04/18 13:49:30 dkf Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.68 2002/05/29 09:09:57 hobbs Exp $
  */
 
 #include "tclInt.h"
@@ -1094,6 +1094,11 @@ Tcl_SplitObjCmd(dummy, interp, objc, objv)
  *	that this command only functions correctly on properly formed
  *	Tcl UTF strings.
  *
+ *	Note that the primary methods here (equal, compare, match, ...)
+ *	have bytecode equivalents.  You will find the code for those in
+ *	tclExecute.c.  The code here will only be used in the non-bc
+ *	case (like in an 'eval').
+ *
  * Results:
  *	A standard Tcl result.
  *
@@ -1153,6 +1158,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 	     * comparison in INST_EQ/INST_NEQ/INST_LT/...).
 	     */
 	    int i, match, length, nocase = 0, reqlength = -1;
+	    int (*strCmpFn)();
 
 	    if (objc < 4 || objc > 7) {
 	    str_cmp_args:
@@ -1183,134 +1189,84 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 		}
 	    }
 
-	    if (reqlength == 0) {
-		/*
-		 * Anything matches at 0 chars, right?
-		 */
-
-		match = 0;
-		goto stringComparisonDone;
-	    }
-
 	    /*
 	     * From now on, we only access the two objects at the end
 	     * of the argument array.
 	     */
 	    objv += objc-2;
 
-	    /*
-	     * Use binary versions of comparisons since that won't
-	     * cause undue type conversions and it is much faster.
-	     * Only do this if we're case-sensitive (which is all
-	     * that really makes sense with byte arrays anyway, and
-	     * we have no memcasecmp() for some reason... :^)
-	     */
-	    if (!nocase && objv[0]->typePtr == &tclByteArrayType &&
-		    objv[1]->typePtr == &tclByteArrayType) {
-		unsigned char *bytes1, *bytes2;
-
-		bytes1 = Tcl_GetByteArrayFromObj(objv[0], &length1);
-		bytes2 = Tcl_GetByteArrayFromObj(objv[1], &length2);
-		length = (length1 < length2) ? length1 : length2;
-
-		if ((reqlength > 0) && (reqlength < length)) {
-		    length = reqlength;
-		} else if (reqlength < 0) {
-		    /*
-		     * The requested length is negative, so we ignore it by
-		     * setting it to the longer of the two lengths.
-		     */
-
-		    reqlength = (length1 > length2) ? length1 : length2;
-		}
+	    if ((reqlength == 0) || (objv[0] == objv[1])) {
+		/*
+		 * Alway match at 0 chars of if it is the same obj.
+		 */
 
-		match = memcmp(bytes1, bytes2, (unsigned)length);
-		if ((match == 0) && (reqlength > length)) {
-		    match = length1 - length2;
+		Tcl_SetBooleanObj(resultPtr,
+			((enum options) index == STR_EQUAL));
+		break;
+	    } else if (!nocase && objv[0]->typePtr == &tclByteArrayType &&
+		    objv[1]->typePtr == &tclByteArrayType) {
+		/*
+		 * Use binary versions of comparisons since that won't
+		 * cause undue type conversions and it is much faster.
+		 * Only do this if we're case-sensitive (which is all
+		 * that really makes sense with byte arrays anyway, and
+		 * we have no memcasecmp() for some reason... :^)
+		 */
+		string1 = (char*) Tcl_GetByteArrayFromObj(objv[0], &length1);
+		string2 = (char*) Tcl_GetByteArrayFromObj(objv[1], &length2);
+		strCmpFn = memcmp;
+	    } else if (((objv[0]->typePtr == &tclStringType)
+		    && (objv[0]->bytes == NULL))
+		    || ((objv[1]->typePtr == &tclStringType)
+			    && (objv[1]->bytes == NULL))) {
+		/*
+		 * Use UNICODE versions of string comparisons since that
+		 * won't cause undue type conversions and we can work with
+		 * characters all of a fixed size (much faster.), but only
+		 * when one of the objects is a pure UNICODE object.
+		 */
+		string1 = (char*) Tcl_GetUnicodeFromObj(objv[0], &length1);
+		string2 = (char*) Tcl_GetUnicodeFromObj(objv[1], &length2);
+		strCmpFn = nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp;
+	    } else {
+		/*
+		 * As a catch-all we will work with UTF-8.  We cannot use
+		 * memcmp() as that is unsafe with any string containing
+		 * NULL (\xC0\x80 in Tcl's utf rep).  We can use the more
+		 * efficient TclpUtfNcmp2 if we are case-sensitive and no
+		 * specific length was requested.
+		 */
+		string1 = (char*) Tcl_GetStringFromObj(objv[0], &length1);
+		string2 = (char*) Tcl_GetStringFromObj(objv[1], &length2);
+		if ((reqlength < 0) && !nocase) {
+		    strCmpFn = TclpUtfNcmp2;
+		} else {
+		    length1 = Tcl_NumUtfChars(string1, length1);
+		    length2 = Tcl_NumUtfChars(string2, length2);
+		    strCmpFn = nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp;
 		}
-		goto stringComparisonDone;
 	    }
 
-	    /*
-	     * Use UNICODE versions of string comparisons since that
-	     * won't cause undue type conversions and we can work with
-	     * characters all of a fixed size (much faster.)  Also use
-	     * this code for untyped objects, since like that we'll
-	     * pick up many things that are used for comparison in
-	     * scripts and convert them (efficiently) to UNICODE
-	     * strings for comparison, but exclude case where both are
-	     * untyped as that is a little bit aggressive.
-	     */
-	    if ((objv[0]->typePtr == &tclStringType ||
-		    objv[0]->typePtr == NULL) &&
-		    (objv[1]->typePtr == &tclStringType ||
-			    objv[1]->typePtr == NULL) &&
-		    !(objv[0]->typePtr == NULL && objv[1]->typePtr == NULL)) {
-		Tcl_UniChar *uni1, *uni2;
-
-		uni1 = Tcl_GetUnicodeFromObj(objv[0], &length1);
-		uni2 = Tcl_GetUnicodeFromObj(objv[1], &length2);
+	    if (((enum options) index == STR_EQUAL)
+		    && (reqlength < 0) && (length1 != length2)) {
+		match = 1; /* this will be reversed below */
+	    } else {
 		length = (length1 < length2) ? length1 : length2;
-
 		if (reqlength > 0 && reqlength < length) {
 		    length = reqlength;
 		} else if (reqlength < 0) {
 		    /*
 		     * The requested length is negative, so we ignore it by
-		     * setting it to the longer of the two lengths.
+		     * setting it to length + 1 so we correct the match var.
 		     */
-
-		    reqlength = (length1 < length2) ? length2 : length1;
-		}
-
-		if (nocase) {
-		    match = Tcl_UniCharNcasecmp(uni1, uni2, (unsigned)length);
-		} else {
-		    match = Tcl_UniCharNcmp(uni1, uni2, (unsigned)length);
+		    reqlength = length + 1;
 		}
-
+		match = strCmpFn(string1, string2, (unsigned) length);
 		if ((match == 0) && (reqlength > length)) {
 		    match = length1 - length2;
 		}
-		goto stringComparisonDone;
-	    }
-
-	    /*
-	     * Strings to be compared are not both UNICODE or byte
-	     * arrays, so we will need to convert to UTF-8 and work
-	     * there (cannot use memcmp() as that is an unsafe
-	     * operation with any string containing \u0000 and the
-	     * safety test is equivalent in cost to the comparison
-	     * itself!)
-	     */
-	    string1 = Tcl_GetStringFromObj(objv[0], &length1);
-	    string2 = Tcl_GetStringFromObj(objv[1], &length2);
-	    length1 = Tcl_NumUtfChars(string1, length1);
-	    length2 = Tcl_NumUtfChars(string2, length2);
-	    length = (length1 < length2) ? length1 : length2;
-
-	    if ((reqlength > 0) && (reqlength < length)) {
-		length = reqlength;
-	    } else if (reqlength < 0) {
-		/*
-		 * The requested length is negative, so we ignore it by
-		 * setting it to the longer of the two lengths.
-		 */
-
-		reqlength = (length1 > length2) ? length1 : length2;
-	    }
-
-	    if (nocase) {
-		match = Tcl_UtfNcasecmp(string1, string2, (unsigned) length);
-	    } else {
-		match = Tcl_UtfNcmp(string1, string2, (unsigned) length);
-	    }
-
-	    if ((match == 0) && (reqlength > length)) {
-		match = length1 - length2;
 	    }
 
-	stringComparisonDone:
 	    if ((enum options) index == STR_EQUAL) {
 		Tcl_SetBooleanObj(resultPtr, (match) ? 0 : 1);
 	    } else {
author	hobbs <hobbs>	2002-05-29 09:09:57 (GMT)
committer	hobbs <hobbs>	2002-05-29 09:09:57 (GMT)
commit	3a1941e7d007e93449aa22085687a8fafaddad7e (patch)
tree	9794bcbc0e094b9645add8339f0278666543ef7e /generic/tclCmdMZ.c
parent	514dd2a61babad4abc78895bdadec335dd9b4c71 (diff)
download	tcl-3a1941e7d007e93449aa22085687a8fafaddad7e.zip tcl-3a1941e7d007e93449aa22085687a8fafaddad7e.tar.gz tcl-3a1941e7d007e93449aa22085687a8fafaddad7e.tar.bz2