Deduplicate code in INST_STR_CMP, StringCmpCmd, and StringEqualCmd.

author: pooryorick <com.digitalsmarties@pooryorick.com> 2018-05-07 07:43:00 (GMT)
committer: pooryorick <com.digitalsmarties@pooryorick.com> 2018-05-07 07:43:00 (GMT)
commit: 5f0cf291f513b75b00ea3d59842b83dc24047c1c (patch)
tree: ef94a987d49542ad8de8301668d1bdb02e450b13 /generic
parent: db133f014426110646fe9631bab793e01cee6129 (diff)
download: tcl-5f0cf291f513b75b00ea3d59842b83dc24047c1c.zip
tcl-5f0cf291f513b75b00ea3d59842b83dc24047c1c.tar.gz
tcl-5f0cf291f513b75b00ea3d59842b83dc24047c1c.tar.bz2
3 files changed, 130 insertions, 245 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 433b9e8..bc798b7 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -2599,10 +2599,8 @@ StringEqualCmd(
      * the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...).
      */
 
-    const char *string1, *string2;
-    int length1, length2, i, match, length, nocase = 0, reqlength = -1;
-    typedef int (*strCmpFn_t)(const char *, const char *, unsigned int);
-    strCmpFn_t strCmpFn;
+    const char *string2;
+    int length2, i, match, nocase = 0, reqlength = -1;
 
     if (objc < 3 || objc > 6) {
     str_cmp_args:
@@ -2641,78 +2639,7 @@ StringEqualCmd(
 
     objv += objc-2;
 
-    if ((reqlength == 0) || (objv[0] == objv[1])) {
-	/*
-	 * Always match at 0 chars of if it is the same obj.
-	 */
-
-	Tcl_SetObjResult(interp, Tcl_NewBooleanObj(1));
-	return TCL_OK;
-    }
-
-    if (!nocase && TclIsPureByteArray(objv[0]) &&
-	    TclIsPureByteArray(objv[1])) {
-	/*
-	 * Use binary versions of comparisons since that won't cause undue
-	 * type conversions and it is much faster. Only do this if we're
-	 * case-sensitive (which is all that really makes sense with byte
-	 * arrays anyway, and we have no memcasecmp() for some reason... :^)
-	 */
-
-	string1 = (char *) Tcl_GetByteArrayFromObj(objv[0], &length1);
-	string2 = (char *) Tcl_GetByteArrayFromObj(objv[1], &length2);
-	strCmpFn = (strCmpFn_t) memcmp;
-    } else if ((objv[0]->typePtr == &tclStringType)
-	    && (objv[1]->typePtr == &tclStringType)) {
-	/*
-	 * Do a unicode-specific comparison if both of the args are of String
-	 * type. In benchmark testing this proved the most efficient check
-	 * between the unicode and string comparison operations.
-	 */
-
-	string1 = (char *) Tcl_GetUnicodeFromObj(objv[0], &length1);
-	string2 = (char *) Tcl_GetUnicodeFromObj(objv[1], &length2);
-	strCmpFn = (strCmpFn_t)
-		(nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp);
-    } else {
-	/*
-	 * As a catch-all we will work with UTF-8. We cannot use memcmp() as
-	 * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's
-	 * utf rep). We can use the more efficient TclpUtfNcmp2 if we are
-	 * case-sensitive and no specific length was requested.
-	 */
-
-	string1 = (char *) TclGetStringFromObj(objv[0], &length1);
-	string2 = (char *) TclGetStringFromObj(objv[1], &length2);
-	if ((reqlength < 0) && !nocase) {
-	    strCmpFn = (strCmpFn_t) TclpUtfNcmp2;
-	} else {
-	    length1 = Tcl_NumUtfChars(string1, length1);
-	    length2 = Tcl_NumUtfChars(string2, length2);
-	    strCmpFn = (strCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp);
-	}
-    }
-
-    if ((reqlength < 0) && (length1 != length2)) {
-	match = 1;		/* This will be reversed below. */
-    } else {
-	length = (length1 < length2) ? length1 : length2;
-	if (reqlength > 0 && reqlength < length) {
-	    length = reqlength;
-	} else if (reqlength < 0) {
-	    /*
-	     * The requested length is negative, so we ignore it by setting it
-	     * to length + 1 so we correct the match var.
-	     */
-
-	    reqlength = length + 1;
-	}
-
-	match = strCmpFn(string1, string2, (unsigned) length);
-	if ((match == 0) && (reqlength > length)) {
-	    match = length1 - length2;
-	}
-    }
+    match = TclStringCmp (objv[0], objv[1], 0, nocase, reqlength);
 
     Tcl_SetObjResult(interp, Tcl_NewBooleanObj(match ? 0 : 1));
     return TCL_OK;
@@ -2749,128 +2676,63 @@ StringCmpCmd(
      * the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...).
      */
 
-    const char *string1, *string2;
-    int length1, length2, match, length, nocase, reqlength, status;
-    typedef int (*strCmpFn_t)(const char *, const char *, unsigned int);
-    strCmpFn_t strCmpFn;
+    int match, nocase, reqlength, status;
+
+    if ((status = TclStringCmpOpts(interp, objc, objv, &nocase, &reqlength))
+	!= TCL_OK) {
 
-    if ((status = TclStringCmpOpts(interp, objc, objv, &reqlength,
-	(char **)&string2, &length2, &nocase)) != TCL_OK){
 	return status;
     }
 
-    /*
-     * From now on, we only access the two objects at the end of the argument
-     * array.
-     */
     objv += objc-2;
-
-    if ((reqlength == 0) || (objv[0] == objv[1])) {
-	/*
-	 * Always match at 0 chars of if it is the same obj.
-	 */
-
-	Tcl_SetObjResult(interp, Tcl_NewBooleanObj(0));
-	return TCL_OK;
-    }
-
-    if (!nocase && TclIsPureByteArray(objv[0]) &&
-	    TclIsPureByteArray(objv[1])) {
-
-	string1 = (char *) Tcl_GetByteArrayFromObj(objv[0], &length1);
-	string2 = (char *) Tcl_GetByteArrayFromObj(objv[1], &length2);
-	strCmpFn = (strCmpFn_t) memcmp;
-    } else if ((objv[0]->typePtr == &tclStringType)
-	    && (objv[1]->typePtr == &tclStringType)) {
-	/*
-	 * Do a unicode-specific comparison if both of the args are of String
-	 * type. In benchmark testing this proved the most efficient check
-	 * between the unicode and string comparison operations.
-	 */
-
-	string1 = (char *) Tcl_GetUnicodeFromObj(objv[0], &length1);
-	string2 = (char *) Tcl_GetUnicodeFromObj(objv[1], &length2);
-	strCmpFn = (strCmpFn_t)
-		(nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp);
-    } else {
-	/*
-	 * As a catch-all we will work with UTF-8. We cannot use memcmp() as
-	 * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's
-	 * utf rep). We can use the more efficient TclpUtfNcmp2 if we are
-	 * case-sensitive and no specific length was requested.
-	 */
-
-	string1 = (char *) TclGetStringFromObj(objv[0], &length1);
-	string2 = (char *) TclGetStringFromObj(objv[1], &length2);
-	if ((reqlength < 0) && !nocase) {
-	    strCmpFn = (strCmpFn_t) TclpUtfNcmp2;
-	} else {
-	    length1 = Tcl_NumUtfChars(string1, length1);
-	    length2 = Tcl_NumUtfChars(string2, length2);
-	    strCmpFn = (strCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp);
-	}
-    }
-
-    length = (length1 < length2) ? length1 : length2;
-    if (reqlength > 0 && reqlength < length) {
-	length = reqlength;
-    } else if (reqlength < 0) {
-	/*
-	 * The requested length is negative, so we ignore it by setting it to
-	 * length + 1 so we correct the match var.
-	 */
-
-	reqlength = length + 1;
-    }
-
-    match = strCmpFn(string1, string2, (unsigned) length);
-    if ((match == 0) && (reqlength > length)) {
-	match = length1 - length2;
-    }
-
-    Tcl_SetObjResult(interp,
-	    Tcl_NewIntObj((match > 0) ? 1 : (match < 0) ? -1 : 0));
+    match = TclStringCmp (objv[0], objv[1], 0, nocase, reqlength);
+    Tcl_SetObjResult(interp, Tcl_NewIntObj(match));
     return TCL_OK;
 }
 
 int TclStringCmp (
 	Tcl_Obj *value1Ptr,
 	Tcl_Obj *value2Ptr,
-	int checkEq
+	int checkEq,        /* comparison is only for equality */
+	int nocase,	    /* comparison is not case sensitive */
+	int reqlength	    /* requested length */
 ) {
-	char *s1, *s2;
-	int empty, match, s1len, s2len;
-	memCmpFn_t memCmpFn;
+    char *s1, *s2;
+    int empty, length, match, s1len, s2len;
+    memCmpFn_t memCmpFn;
 
+    if ((reqlength == 0) || (value1Ptr == value2Ptr)) {
 	/*
-	 * When we have equal-length we can check only for (in)equality.
-	 * We can use memcmp in all (n)eq cases because we
-	 * don't need to worry about lexical LE/BE variance.
+	 * Always match at 0 chars of if it is the same obj.
 	 */
+	match = 0;
+    } else {
 
-	if (value1Ptr == value2Ptr) {
-	    match = 0;
-	} else {
-	    if (TclIsPureByteArray(value1Ptr)
-		    && TclIsPureByteArray(value2Ptr)) {
-		/*
-		 * Use binary versions of comparisons since that won't cause undue
-		 * type conversions and it is much faster. Only do this if we're
-		 * case-sensitive (which is all that really makes sense with byte
-		 * arrays anyway, and we have no memcasecmp() for some reason... :^)
-		 */
-		s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len);
-		s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len);
-		memCmpFn = memcmp;
-	    } else if ((value1Ptr->typePtr == &tclStringType)
-		    && (value2Ptr->typePtr == &tclStringType)) {
-		/*
-		 * Do a unicode-specific comparison if both of the args are of
-		 * String type. If the char length == byte length, we can do a
-		 * memcmp. In benchmark testing this proved the most efficient
-		 * check between the unicode and string comparison operations.
-		 */
+	if (!nocase && TclIsPureByteArray(value1Ptr)
+		&& TclIsPureByteArray(value2Ptr)) {
+	    /*
+	     * Use binary versions of comparisons since that won't cause undue
+	     * type conversions and it is much faster. Only do this if we're
+	     * case-sensitive (which is all that really makes sense with byte
+	     * arrays anyway, and we have no memcasecmp() for some reason... :^)
+	     */
+	    s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len);
+	    s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len);
+	    memCmpFn = memcmp;
+	} else if ((value1Ptr->typePtr == &tclStringType)
+		&& (value2Ptr->typePtr == &tclStringType)) {
+	    /*
+	     * Do a unicode-specific comparison if both of the args are of
+	     * String type. If the char length == byte length, we can do a
+	     * memcmp. In benchmark testing this proved the most efficient
+	     * check between the unicode and string comparison operations.
+	     */
 
+	    if (nocase) {
+		s1 = (char *) Tcl_GetUnicodeFromObj(value1Ptr, &s1len);
+		s2 = (char *) Tcl_GetUnicodeFromObj(value2Ptr, &s2len);
+		memCmpFn = (memCmpFn_t)Tcl_UniCharNcasecmp;
+	    } else {
 		s1len = Tcl_GetCharLength(value1Ptr);
 		s2len = Tcl_GetCharLength(value2Ptr);
 		if ((s1len == value1Ptr->length)
@@ -2897,66 +2759,92 @@ int TclStringCmp (
 			memCmpFn = (memCmpFn_t) Tcl_UniCharNcmp;
 		    }
 		}
-	    } else {
-		/*
-		 * In order to handle the special Tcl \xC0\x80 null encoding
-		 * for utf-8, strcmp can't do a simple memcmp.
-		 */
-
-		if ((empty = TclCheckEmptyString(value1Ptr)) > 0) {
-		    switch (TclCheckEmptyString(value2Ptr)) {
-			case -1:
-			s1 = "";
-			s1len = 0;
-			s2 = TclGetStringFromObj(value2Ptr, &s2len);
-			break;
-			case 0:
-			match = -1;
-			goto matchdone;
-			case 1:
-			match = 0;
-			goto matchdone;
-		    }
-		} else if (TclCheckEmptyString(value2Ptr) > 0) {
-		    switch (empty) {
-			case -1:
-			s2 = "";
-			s2len = 0;
-			s1 = TclGetStringFromObj(value1Ptr, &s1len);
-			break;
-			case 0:
-			match = 1;
-			goto matchdone;
-			case 1:
-			match = 0;
-			goto matchdone;
-		    }
-		} else {
-		    s1 = TclGetStringFromObj(value1Ptr, &s1len);
+	    }
+	} else {
+	    if ((empty = TclCheckEmptyString(value1Ptr)) > 0) {
+		switch (TclCheckEmptyString(value2Ptr)) {
+		    case -1:
+		    s1 = "";
+		    s1len = 0;
 		    s2 = TclGetStringFromObj(value2Ptr, &s2len);
+		    break;
+		    case 0:
+		    match = -1;
+		    goto matchdone;
+		    case 1:
+		    match = 0;
+		    goto matchdone;
 		}
-
-		if (checkEq) {
-		    memCmpFn = memcmp;
-		} else {
-		    memCmpFn = (memCmpFn_t) TclpUtfNcmp2;
+	    } else if (TclCheckEmptyString(value2Ptr) > 0) {
+		switch (empty) {
+		    case -1:
+		    s2 = "";
+		    s2len = 0;
+		    s1 = TclGetStringFromObj(value1Ptr, &s1len);
+		    break;
+		    case 0:
+		    match = 1;
+		    goto matchdone;
+		    case 1:
+		    match = 0;
+		    goto matchdone;
 		}
+	    } else {
+		s1 = TclGetStringFromObj(value1Ptr, &s1len);
+		s2 = TclGetStringFromObj(value2Ptr, &s2len);
 	    }
+	    if (!nocase && checkEq) {
+		/*
+		 * When we have equal-length we can check only for (in)equality.
+		 * We can use memcmp in all (n)eq cases because we
+		 * don't need to worry about lexical LE/BE variance.
+		 */
+		memCmpFn = memcmp;
+	    } else {
 
-	    if (checkEq && (s1len != s2len)) {
-		match = 1;
-	    }  else {
 		/*
-		 * The comparison function should compare up to the minimum
-		 * byte length only.
+		 * As a catch-all we will work with UTF-8. We cannot use memcmp() as
+		 * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's
+		 * utf rep). We can use the more efficient TclpUtfNcmp2 if we are
+		 * case-sensitive and no specific length was requested.
 		 */
-		match = memCmpFn(s1, s2,
-			(size_t) ((s1len < s2len) ? s1len : s2len));
-		if (match == 0) {
-		    match = s1len - s2len;
+
+		if ((reqlength < 0) && !nocase) {
+		    memCmpFn = (memCmpFn_t) TclpUtfNcmp2;
+		} else {
+		    s1len = Tcl_NumUtfChars(s1, s1len);
+		    s2len = Tcl_NumUtfChars(s2, s2len);
+		    memCmpFn = (memCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp);
 		}
 	    }
 	}
+
+	length = (s1len < s2len) ? s1len : s2len;
+	if (reqlength > 0 && reqlength < length) {
+	    length = reqlength;
+	} else if (reqlength < 0) {
+	    /*
+	     * The requested length is negative, so we ignore it by setting it to
+	     * length + 1 so we correct the match var.
+	     */
+
+	    reqlength = length + 1;
+	}
+
+	if (checkEq && (s1len != s2len)) {
+	    match = 1;		/* This will be reversed below. */
+	}  else {
+	    /*
+	     * The comparison function should compare up to the minimum
+	     * byte length only.
+	     */
+	    match = memCmpFn(s1, s2, (size_t) length);
+	}
+	if ((match == 0) && (reqlength > length)) {
+	    match = s1len - s2len;
+	}
+	match = (match > 0) ? 1 : (match < 0) ? -1 : 0;
+    }
     matchdone:
     return match;
 }
@@ -2965,15 +2853,12 @@ int TclStringCmpOpts (
     Tcl_Interp *interp,		/* Current interpreter. */
     int objc,			/* Number of arguments. */
     Tcl_Obj *const objv[],	/* Argument objects. */
-    int *reqlength,
-    char **stringPtr,
-    int *length,
-    int *nocase
-
+    int *nocase,
+    int *reqlength
 )
 {
-    int i;
-    const char *string = *stringPtr;
+    int i, length;
+    const char *string;
 
     *reqlength = -1;
     *nocase = 0;
@@ -2985,11 +2870,11 @@ int TclStringCmpOpts (
     }
 
     for (i = 1; i < objc-2; i++) {
-	string = TclGetStringFromObj(objv[i], length);
-	if ((*length > 1) && !strncmp(string, "-nocase", (size_t)*length)) {
+	string = TclGetStringFromObj(objv[i], &length);
+	if ((length > 1) && !strncmp(string, "-nocase", (size_t)length)) {
 	    *nocase = 1;
-	} else if ((*length > 1)
-		&& !strncmp(string, "-length", (size_t)*length)) {
+	} else if ((length > 1)
+		&& !strncmp(string, "-length", (size_t)length)) {
 	    if (i+1 >= objc-2) {
 		goto str_cmp_args;
 	    }
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index a76e686..4c14514 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -5387,7 +5387,7 @@ TEBCresume(
 	{
 	    int checkEq = ((*pc == INST_EQ) || (*pc == INST_NEQ)
 		    || (*pc == INST_STR_EQ) || (*pc == INST_STR_NEQ));
-	    match = TclStringCmp(valuePtr, value2Ptr, checkEq);
+	    match = TclStringCmp(valuePtr, value2Ptr, checkEq, 0, -1);
 	}
 
 	/*
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 67f53fd..0a3285f 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3158,9 +3158,9 @@ MODULE_SCOPE void *	TclStackRealloc(Tcl_Interp *interp, void *ptr,
 
 typedef int (*memCmpFn_t)(const void*, const void*, size_t);
 MODULE_SCOPE int	TclStringCmp (Tcl_Obj *value1Ptr, Tcl_Obj *value2Ptr,
-			    int checkEq);
+			    int checkEq, int nocase, int reqlength);
 MODULE_SCOPE int	TclStringCmpOpts (Tcl_Interp *interp, int objc, Tcl_Obj *const objv[],
-			    int *reqlength, char **stringPtr, int *length2, int *nocase);
+			    int *nocase, int *reqlength);
 MODULE_SCOPE int	TclStringMatch(const char *str, int strLen,
 			    const char *pattern, int ptnLen, int flags);
 MODULE_SCOPE int	TclStringMatchObj(Tcl_Obj *stringObj,
author	pooryorick <com.digitalsmarties@pooryorick.com>	2018-05-07 07:43:00 (GMT)
committer	pooryorick <com.digitalsmarties@pooryorick.com>	2018-05-07 07:43:00 (GMT)
commit	5f0cf291f513b75b00ea3d59842b83dc24047c1c (patch)
tree	ef94a987d49542ad8de8301668d1bdb02e450b13 /generic
parent	db133f014426110646fe9631bab793e01cee6129 (diff)
download	tcl-5f0cf291f513b75b00ea3d59842b83dc24047c1c.zip tcl-5f0cf291f513b75b00ea3d59842b83dc24047c1c.tar.gz tcl-5f0cf291f513b75b00ea3d59842b83dc24047c1c.tar.bz2