summaryrefslogtreecommitdiffstats
path: root/generic/tclUtil.c
diff options
context:
space:
mode:
authorhobbs <hobbs>2000-05-08 21:59:58 (GMT)
committerhobbs <hobbs>2000-05-08 21:59:58 (GMT)
commit09f4c1de476f86324d54f2e8c31a66870ce1c8bc (patch)
tree025da577bdce141098365ffb242ca0ae0be52104 /generic/tclUtil.c
parent63adaf2eb6d8949c310ea3f93c699ed6dd1c8839 (diff)
downloadtcl-09f4c1de476f86324d54f2e8c31a66870ce1c8bc.zip
tcl-09f4c1de476f86324d54f2e8c31a66870ce1c8bc.tar.gz
tcl-09f4c1de476f86324d54f2e8c31a66870ce1c8bc.tar.bz2
* doc/Utf.3:
* generic/tclStubInit.c: * generic/tcl.decls: * generic/tclDecls.h: * generic/tclUtf.c: Added new functions Tcl_UniCharNcasecmp and Tcl_UniCharCaseMatch (unicode parallel to Tcl_StringCaseMatch) * generic/tclUtil.c: rewrote Tcl_StringCaseMatch algorithm for optimization and made Tcl_StringMatch just call Tcl_StringCaseMatch
Diffstat (limited to 'generic/tclUtil.c')
-rw-r--r--generic/tclUtil.c189
1 files changed, 46 insertions, 143 deletions
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 6e99f32..8a2aa94 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -10,7 +10,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclUtil.c,v 1.17 1999/12/12 02:26:43 hobbs Exp $
+ * RCS: @(#) $Id: tclUtil.c,v 1.18 2000/05/08 21:59:59 hobbs Exp $
*/
#include "tclInt.h"
@@ -1136,131 +1136,7 @@ Tcl_StringMatch(string, pattern)
CONST char *pattern; /* Pattern, which may contain special
* characters. */
{
- int p, s;
- CONST char *pstart = pattern;
-
- while (1) {
- p = *pattern;
- s = *string;
-
- /*
- * See if we're at the end of both the pattern and the string. If
- * so, we succeeded. If we're at the end of the pattern but not at
- * the end of the string, we failed.
- */
-
- if (p == '\0') {
- if (s == '\0') {
- return 1;
- } else {
- return 0;
- }
- }
- if ((s == '\0') && (p != '*')) {
- return 0;
- }
-
- /* Check for a "*" as the next pattern character. It matches
- * any substring. We handle this by calling ourselves
- * recursively for each postfix of string, until either we
- * match or we reach the end of the string.
- */
-
- if (p == '*') {
- pattern++;
- if (*pattern == '\0') {
- return 1;
- }
- while (1) {
- if (Tcl_StringMatch(string, pattern)) {
- return 1;
- }
- if (*string == '\0') {
- return 0;
- }
- string++;
- }
- }
-
- /* Check for a "?" as the next pattern character. It matches
- * any single character.
- */
-
- if (p == '?') {
- Tcl_UniChar ch;
-
- pattern++;
- string += Tcl_UtfToUniChar(string, &ch);
- continue;
- }
-
- /* Check for a "[" as the next pattern character. It is followed
- * by a list of characters that are acceptable, or by a range
- * (two characters separated by "-").
- */
-
- if (p == '[') {
- Tcl_UniChar ch, startChar, endChar;
-
- pattern++;
- string += Tcl_UtfToUniChar(string, &ch);
-
- while (1) {
- if ((*pattern == ']') || (*pattern == '\0')) {
- return 0;
- }
- pattern += Tcl_UtfToUniChar(pattern, &startChar);
- if (*pattern == '-') {
- pattern++;
- if (*pattern == '\0') {
- return 0;
- }
- pattern += Tcl_UtfToUniChar(pattern, &endChar);
- if (((startChar <= ch) && (ch <= endChar))
- || ((endChar <= ch) && (ch <= startChar))) {
- /*
- * Matches ranges of form [a-z] or [z-a].
- */
-
- break;
- }
- } else if (startChar == ch) {
- break;
- }
- }
- while (*pattern != ']') {
- if (*pattern == '\0') {
- pattern = Tcl_UtfPrev(pattern, pstart);
- break;
- }
- pattern++;
- }
- pattern++;
- continue;
- }
-
- /* If the next pattern character is '\', just strip off the '\'
- * so we do exact matching on the character that follows.
- */
-
- if (p == '\\') {
- pattern++;
- p = *pattern;
- if (p == '\0') {
- return 0;
- }
- }
-
- /* There's no special character. Just make sure that the next
- * bytes of each string match.
- */
-
- if (s != p) {
- return 0;
- }
- pattern++;
- string++;
- }
+ return Tcl_StringCaseMatch(string, pattern, 0);
}
/*
@@ -1290,13 +1166,12 @@ Tcl_StringCaseMatch(string, pattern, nocase)
* characters. */
int nocase; /* 0 for case sensitive, 1 for insensitive */
{
- int p, s;
+ int p;
CONST char *pstart = pattern;
Tcl_UniChar ch1, ch2;
while (1) {
p = *pattern;
- s = *string;
/*
* See if we're at the end of both the pattern and the string. If
@@ -1305,35 +1180,61 @@ Tcl_StringCaseMatch(string, pattern, nocase)
*/
if (p == '\0') {
- return (s == '\0');
+ return (*string == '\0');
}
- if ((s == '\0') && (p != '*')) {
+ if ((*string == '\0') && (p != '*')) {
return 0;
}
- /* Check for a "*" as the next pattern character. It matches
+ /*
+ * Check for a "*" as the next pattern character. It matches
* any substring. We handle this by calling ourselves
* recursively for each postfix of string, until either we
* match or we reach the end of the string.
*/
if (p == '*') {
- pattern++;
- if (*pattern == '\0') {
+ /*
+ * Skip all successive *'s in the pattern
+ */
+ while (*(++pattern) == '*') {}
+ p = *pattern;
+ if (p == '\0') {
return 1;
}
while (1) {
+ /*
+ * Optimization for matching - cruise through the string
+ * quickly if the next char in the pattern isn't a special
+ * character
+ */
+ if ((p != '[') && (p != '?') && (p != '\\')) {
+ if (nocase) {
+ while (*string && (p != *string)) {
+ ch2 = Tcl_UtfToUniChar(string, &ch1);
+ if (p == Tcl_UniCharToLower(ch1)) {
+ break;
+ }
+ string += ch2;
+ }
+ } else {
+ while (*string && (p != *string)) {
+ string += Tcl_UtfToUniChar(string, &ch1);
+ }
+ }
+ }
if (Tcl_StringCaseMatch(string, pattern, nocase)) {
return 1;
}
if (*string == '\0') {
return 0;
}
- string++;
+ string += Tcl_UtfToUniChar(string, &ch1);
}
}
- /* Check for a "?" as the next pattern character. It matches
+ /*
+ * Check for a "?" as the next pattern character. It matches
* any single character.
*/
@@ -1343,11 +1244,12 @@ Tcl_StringCaseMatch(string, pattern, nocase)
continue;
}
- /* Check for a "[" as the next pattern character. It is followed
+ /*
+ * Check for a "[" as the next pattern character. It is followed
* by a list of characters that are acceptable, or by a range
* (two characters separated by "-").
*/
-
+
if (p == '[') {
Tcl_UniChar startChar, endChar;
@@ -1396,22 +1298,23 @@ Tcl_StringCaseMatch(string, pattern, nocase)
continue;
}
- /* If the next pattern character is '\', just strip off the '\'
+ /*
+ * If the next pattern character is '\', just strip off the '\'
* so we do exact matching on the character that follows.
*/
-
+
if (p == '\\') {
pattern++;
- p = *pattern;
- if (p == '\0') {
+ if (*pattern == '\0') {
return 0;
}
}
- /* There's no special character. Just make sure that the next
+ /*
+ * There's no special character. Just make sure that the next
* bytes of each string match.
*/
-
+
string += Tcl_UtfToUniChar(string, &ch1);
pattern += Tcl_UtfToUniChar(pattern, &ch2);
if (nocase) {