summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
authorpooryorick <com.digitalsmarties@pooryorick.com>2018-05-07 07:43:00 (GMT)
committerpooryorick <com.digitalsmarties@pooryorick.com>2018-05-07 07:43:00 (GMT)
commit5f0cf291f513b75b00ea3d59842b83dc24047c1c (patch)
treeef94a987d49542ad8de8301668d1bdb02e450b13 /generic
parentdb133f014426110646fe9631bab793e01cee6129 (diff)
downloadtcl-5f0cf291f513b75b00ea3d59842b83dc24047c1c.zip
tcl-5f0cf291f513b75b00ea3d59842b83dc24047c1c.tar.gz
tcl-5f0cf291f513b75b00ea3d59842b83dc24047c1c.tar.bz2
Deduplicate code in INST_STR_CMP, StringCmpCmd, and StringEqualCmd.
Diffstat (limited to 'generic')
-rw-r--r--generic/tclCmdMZ.c369
-rw-r--r--generic/tclExecute.c2
-rw-r--r--generic/tclInt.h4
3 files changed, 130 insertions, 245 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 433b9e8..bc798b7 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -2599,10 +2599,8 @@ StringEqualCmd(
* the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...).
*/
- const char *string1, *string2;
- int length1, length2, i, match, length, nocase = 0, reqlength = -1;
- typedef int (*strCmpFn_t)(const char *, const char *, unsigned int);
- strCmpFn_t strCmpFn;
+ const char *string2;
+ int length2, i, match, nocase = 0, reqlength = -1;
if (objc < 3 || objc > 6) {
str_cmp_args:
@@ -2641,78 +2639,7 @@ StringEqualCmd(
objv += objc-2;
- if ((reqlength == 0) || (objv[0] == objv[1])) {
- /*
- * Always match at 0 chars of if it is the same obj.
- */
-
- Tcl_SetObjResult(interp, Tcl_NewBooleanObj(1));
- return TCL_OK;
- }
-
- if (!nocase && TclIsPureByteArray(objv[0]) &&
- TclIsPureByteArray(objv[1])) {
- /*
- * Use binary versions of comparisons since that won't cause undue
- * type conversions and it is much faster. Only do this if we're
- * case-sensitive (which is all that really makes sense with byte
- * arrays anyway, and we have no memcasecmp() for some reason... :^)
- */
-
- string1 = (char *) Tcl_GetByteArrayFromObj(objv[0], &length1);
- string2 = (char *) Tcl_GetByteArrayFromObj(objv[1], &length2);
- strCmpFn = (strCmpFn_t) memcmp;
- } else if ((objv[0]->typePtr == &tclStringType)
- && (objv[1]->typePtr == &tclStringType)) {
- /*
- * Do a unicode-specific comparison if both of the args are of String
- * type. In benchmark testing this proved the most efficient check
- * between the unicode and string comparison operations.
- */
-
- string1 = (char *) Tcl_GetUnicodeFromObj(objv[0], &length1);
- string2 = (char *) Tcl_GetUnicodeFromObj(objv[1], &length2);
- strCmpFn = (strCmpFn_t)
- (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp);
- } else {
- /*
- * As a catch-all we will work with UTF-8. We cannot use memcmp() as
- * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's
- * utf rep). We can use the more efficient TclpUtfNcmp2 if we are
- * case-sensitive and no specific length was requested.
- */
-
- string1 = (char *) TclGetStringFromObj(objv[0], &length1);
- string2 = (char *) TclGetStringFromObj(objv[1], &length2);
- if ((reqlength < 0) && !nocase) {
- strCmpFn = (strCmpFn_t) TclpUtfNcmp2;
- } else {
- length1 = Tcl_NumUtfChars(string1, length1);
- length2 = Tcl_NumUtfChars(string2, length2);
- strCmpFn = (strCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp);
- }
- }
-
- if ((reqlength < 0) && (length1 != length2)) {
- match = 1; /* This will be reversed below. */
- } else {
- length = (length1 < length2) ? length1 : length2;
- if (reqlength > 0 && reqlength < length) {
- length = reqlength;
- } else if (reqlength < 0) {
- /*
- * The requested length is negative, so we ignore it by setting it
- * to length + 1 so we correct the match var.
- */
-
- reqlength = length + 1;
- }
-
- match = strCmpFn(string1, string2, (unsigned) length);
- if ((match == 0) && (reqlength > length)) {
- match = length1 - length2;
- }
- }
+ match = TclStringCmp (objv[0], objv[1], 0, nocase, reqlength);
Tcl_SetObjResult(interp, Tcl_NewBooleanObj(match ? 0 : 1));
return TCL_OK;
@@ -2749,128 +2676,63 @@ StringCmpCmd(
* the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...).
*/
- const char *string1, *string2;
- int length1, length2, match, length, nocase, reqlength, status;
- typedef int (*strCmpFn_t)(const char *, const char *, unsigned int);
- strCmpFn_t strCmpFn;
+ int match, nocase, reqlength, status;
+
+ if ((status = TclStringCmpOpts(interp, objc, objv, &nocase, &reqlength))
+ != TCL_OK) {
- if ((status = TclStringCmpOpts(interp, objc, objv, &reqlength,
- (char **)&string2, &length2, &nocase)) != TCL_OK){
return status;
}
- /*
- * From now on, we only access the two objects at the end of the argument
- * array.
- */
objv += objc-2;
-
- if ((reqlength == 0) || (objv[0] == objv[1])) {
- /*
- * Always match at 0 chars of if it is the same obj.
- */
-
- Tcl_SetObjResult(interp, Tcl_NewBooleanObj(0));
- return TCL_OK;
- }
-
- if (!nocase && TclIsPureByteArray(objv[0]) &&
- TclIsPureByteArray(objv[1])) {
-
- string1 = (char *) Tcl_GetByteArrayFromObj(objv[0], &length1);
- string2 = (char *) Tcl_GetByteArrayFromObj(objv[1], &length2);
- strCmpFn = (strCmpFn_t) memcmp;
- } else if ((objv[0]->typePtr == &tclStringType)
- && (objv[1]->typePtr == &tclStringType)) {
- /*
- * Do a unicode-specific comparison if both of the args are of String
- * type. In benchmark testing this proved the most efficient check
- * between the unicode and string comparison operations.
- */
-
- string1 = (char *) Tcl_GetUnicodeFromObj(objv[0], &length1);
- string2 = (char *) Tcl_GetUnicodeFromObj(objv[1], &length2);
- strCmpFn = (strCmpFn_t)
- (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp);
- } else {
- /*
- * As a catch-all we will work with UTF-8. We cannot use memcmp() as
- * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's
- * utf rep). We can use the more efficient TclpUtfNcmp2 if we are
- * case-sensitive and no specific length was requested.
- */
-
- string1 = (char *) TclGetStringFromObj(objv[0], &length1);
- string2 = (char *) TclGetStringFromObj(objv[1], &length2);
- if ((reqlength < 0) && !nocase) {
- strCmpFn = (strCmpFn_t) TclpUtfNcmp2;
- } else {
- length1 = Tcl_NumUtfChars(string1, length1);
- length2 = Tcl_NumUtfChars(string2, length2);
- strCmpFn = (strCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp);
- }
- }
-
- length = (length1 < length2) ? length1 : length2;
- if (reqlength > 0 && reqlength < length) {
- length = reqlength;
- } else if (reqlength < 0) {
- /*
- * The requested length is negative, so we ignore it by setting it to
- * length + 1 so we correct the match var.
- */
-
- reqlength = length + 1;
- }
-
- match = strCmpFn(string1, string2, (unsigned) length);
- if ((match == 0) && (reqlength > length)) {
- match = length1 - length2;
- }
-
- Tcl_SetObjResult(interp,
- Tcl_NewIntObj((match > 0) ? 1 : (match < 0) ? -1 : 0));
+ match = TclStringCmp (objv[0], objv[1], 0, nocase, reqlength);
+ Tcl_SetObjResult(interp, Tcl_NewIntObj(match));
return TCL_OK;
}
int TclStringCmp (
Tcl_Obj *value1Ptr,
Tcl_Obj *value2Ptr,
- int checkEq
+ int checkEq, /* comparison is only for equality */
+ int nocase, /* comparison is not case sensitive */
+ int reqlength /* requested length */
) {
- char *s1, *s2;
- int empty, match, s1len, s2len;
- memCmpFn_t memCmpFn;
+ char *s1, *s2;
+ int empty, length, match, s1len, s2len;
+ memCmpFn_t memCmpFn;
+ if ((reqlength == 0) || (value1Ptr == value2Ptr)) {
/*
- * When we have equal-length we can check only for (in)equality.
- * We can use memcmp in all (n)eq cases because we
- * don't need to worry about lexical LE/BE variance.
+ * Always match at 0 chars of if it is the same obj.
*/
+ match = 0;
+ } else {
- if (value1Ptr == value2Ptr) {
- match = 0;
- } else {
- if (TclIsPureByteArray(value1Ptr)
- && TclIsPureByteArray(value2Ptr)) {
- /*
- * Use binary versions of comparisons since that won't cause undue
- * type conversions and it is much faster. Only do this if we're
- * case-sensitive (which is all that really makes sense with byte
- * arrays anyway, and we have no memcasecmp() for some reason... :^)
- */
- s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len);
- s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len);
- memCmpFn = memcmp;
- } else if ((value1Ptr->typePtr == &tclStringType)
- && (value2Ptr->typePtr == &tclStringType)) {
- /*
- * Do a unicode-specific comparison if both of the args are of
- * String type. If the char length == byte length, we can do a
- * memcmp. In benchmark testing this proved the most efficient
- * check between the unicode and string comparison operations.
- */
+ if (!nocase && TclIsPureByteArray(value1Ptr)
+ && TclIsPureByteArray(value2Ptr)) {
+ /*
+ * Use binary versions of comparisons since that won't cause undue
+ * type conversions and it is much faster. Only do this if we're
+ * case-sensitive (which is all that really makes sense with byte
+ * arrays anyway, and we have no memcasecmp() for some reason... :^)
+ */
+ s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len);
+ s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len);
+ memCmpFn = memcmp;
+ } else if ((value1Ptr->typePtr == &tclStringType)
+ && (value2Ptr->typePtr == &tclStringType)) {
+ /*
+ * Do a unicode-specific comparison if both of the args are of
+ * String type. If the char length == byte length, we can do a
+ * memcmp. In benchmark testing this proved the most efficient
+ * check between the unicode and string comparison operations.
+ */
+ if (nocase) {
+ s1 = (char *) Tcl_GetUnicodeFromObj(value1Ptr, &s1len);
+ s2 = (char *) Tcl_GetUnicodeFromObj(value2Ptr, &s2len);
+ memCmpFn = (memCmpFn_t)Tcl_UniCharNcasecmp;
+ } else {
s1len = Tcl_GetCharLength(value1Ptr);
s2len = Tcl_GetCharLength(value2Ptr);
if ((s1len == value1Ptr->length)
@@ -2897,66 +2759,92 @@ int TclStringCmp (
memCmpFn = (memCmpFn_t) Tcl_UniCharNcmp;
}
}
- } else {
- /*
- * In order to handle the special Tcl \xC0\x80 null encoding
- * for utf-8, strcmp can't do a simple memcmp.
- */
-
- if ((empty = TclCheckEmptyString(value1Ptr)) > 0) {
- switch (TclCheckEmptyString(value2Ptr)) {
- case -1:
- s1 = "";
- s1len = 0;
- s2 = TclGetStringFromObj(value2Ptr, &s2len);
- break;
- case 0:
- match = -1;
- goto matchdone;
- case 1:
- match = 0;
- goto matchdone;
- }
- } else if (TclCheckEmptyString(value2Ptr) > 0) {
- switch (empty) {
- case -1:
- s2 = "";
- s2len = 0;
- s1 = TclGetStringFromObj(value1Ptr, &s1len);
- break;
- case 0:
- match = 1;
- goto matchdone;
- case 1:
- match = 0;
- goto matchdone;
- }
- } else {
- s1 = TclGetStringFromObj(value1Ptr, &s1len);
+ }
+ } else {
+ if ((empty = TclCheckEmptyString(value1Ptr)) > 0) {
+ switch (TclCheckEmptyString(value2Ptr)) {
+ case -1:
+ s1 = "";
+ s1len = 0;
s2 = TclGetStringFromObj(value2Ptr, &s2len);
+ break;
+ case 0:
+ match = -1;
+ goto matchdone;
+ case 1:
+ match = 0;
+ goto matchdone;
}
-
- if (checkEq) {
- memCmpFn = memcmp;
- } else {
- memCmpFn = (memCmpFn_t) TclpUtfNcmp2;
+ } else if (TclCheckEmptyString(value2Ptr) > 0) {
+ switch (empty) {
+ case -1:
+ s2 = "";
+ s2len = 0;
+ s1 = TclGetStringFromObj(value1Ptr, &s1len);
+ break;
+ case 0:
+ match = 1;
+ goto matchdone;
+ case 1:
+ match = 0;
+ goto matchdone;
}
+ } else {
+ s1 = TclGetStringFromObj(value1Ptr, &s1len);
+ s2 = TclGetStringFromObj(value2Ptr, &s2len);
}
+ if (!nocase && checkEq) {
+ /*
+ * When we have equal-length we can check only for (in)equality.
+ * We can use memcmp in all (n)eq cases because we
+ * don't need to worry about lexical LE/BE variance.
+ */
+ memCmpFn = memcmp;
+ } else {
- if (checkEq && (s1len != s2len)) {
- match = 1;
- } else {
/*
- * The comparison function should compare up to the minimum
- * byte length only.
+ * As a catch-all we will work with UTF-8. We cannot use memcmp() as
+ * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's
+ * utf rep). We can use the more efficient TclpUtfNcmp2 if we are
+ * case-sensitive and no specific length was requested.
*/
- match = memCmpFn(s1, s2,
- (size_t) ((s1len < s2len) ? s1len : s2len));
- if (match == 0) {
- match = s1len - s2len;
+
+ if ((reqlength < 0) && !nocase) {
+ memCmpFn = (memCmpFn_t) TclpUtfNcmp2;
+ } else {
+ s1len = Tcl_NumUtfChars(s1, s1len);
+ s2len = Tcl_NumUtfChars(s2, s2len);
+ memCmpFn = (memCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp);
}
}
}
+
+ length = (s1len < s2len) ? s1len : s2len;
+ if (reqlength > 0 && reqlength < length) {
+ length = reqlength;
+ } else if (reqlength < 0) {
+ /*
+ * The requested length is negative, so we ignore it by setting it to
+ * length + 1 so we correct the match var.
+ */
+
+ reqlength = length + 1;
+ }
+
+ if (checkEq && (s1len != s2len)) {
+ match = 1; /* This will be reversed below. */
+ } else {
+ /*
+ * The comparison function should compare up to the minimum
+ * byte length only.
+ */
+ match = memCmpFn(s1, s2, (size_t) length);
+ }
+ if ((match == 0) && (reqlength > length)) {
+ match = s1len - s2len;
+ }
+ match = (match > 0) ? 1 : (match < 0) ? -1 : 0;
+ }
matchdone:
return match;
}
@@ -2965,15 +2853,12 @@ int TclStringCmpOpts (
Tcl_Interp *interp, /* Current interpreter. */
int objc, /* Number of arguments. */
Tcl_Obj *const objv[], /* Argument objects. */
- int *reqlength,
- char **stringPtr,
- int *length,
- int *nocase
-
+ int *nocase,
+ int *reqlength
)
{
- int i;
- const char *string = *stringPtr;
+ int i, length;
+ const char *string;
*reqlength = -1;
*nocase = 0;
@@ -2985,11 +2870,11 @@ int TclStringCmpOpts (
}
for (i = 1; i < objc-2; i++) {
- string = TclGetStringFromObj(objv[i], length);
- if ((*length > 1) && !strncmp(string, "-nocase", (size_t)*length)) {
+ string = TclGetStringFromObj(objv[i], &length);
+ if ((length > 1) && !strncmp(string, "-nocase", (size_t)length)) {
*nocase = 1;
- } else if ((*length > 1)
- && !strncmp(string, "-length", (size_t)*length)) {
+ } else if ((length > 1)
+ && !strncmp(string, "-length", (size_t)length)) {
if (i+1 >= objc-2) {
goto str_cmp_args;
}
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index a76e686..4c14514 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -5387,7 +5387,7 @@ TEBCresume(
{
int checkEq = ((*pc == INST_EQ) || (*pc == INST_NEQ)
|| (*pc == INST_STR_EQ) || (*pc == INST_STR_NEQ));
- match = TclStringCmp(valuePtr, value2Ptr, checkEq);
+ match = TclStringCmp(valuePtr, value2Ptr, checkEq, 0, -1);
}
/*
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 67f53fd..0a3285f 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3158,9 +3158,9 @@ MODULE_SCOPE void * TclStackRealloc(Tcl_Interp *interp, void *ptr,
typedef int (*memCmpFn_t)(const void*, const void*, size_t);
MODULE_SCOPE int TclStringCmp (Tcl_Obj *value1Ptr, Tcl_Obj *value2Ptr,
- int checkEq);
+ int checkEq, int nocase, int reqlength);
MODULE_SCOPE int TclStringCmpOpts (Tcl_Interp *interp, int objc, Tcl_Obj *const objv[],
- int *reqlength, char **stringPtr, int *length2, int *nocase);
+ int *nocase, int *reqlength);
MODULE_SCOPE int TclStringMatch(const char *str, int strLen,
const char *pattern, int ptnLen, int flags);
MODULE_SCOPE int TclStringMatchObj(Tcl_Obj *stringObj,