diff options
author | hobbs <hobbs> | 2000-05-08 21:59:58 (GMT) |
---|---|---|
committer | hobbs <hobbs> | 2000-05-08 21:59:58 (GMT) |
commit | 09f4c1de476f86324d54f2e8c31a66870ce1c8bc (patch) | |
tree | 025da577bdce141098365ffb242ca0ae0be52104 /generic/tclUtil.c | |
parent | 63adaf2eb6d8949c310ea3f93c699ed6dd1c8839 (diff) | |
download | tcl-09f4c1de476f86324d54f2e8c31a66870ce1c8bc.zip tcl-09f4c1de476f86324d54f2e8c31a66870ce1c8bc.tar.gz tcl-09f4c1de476f86324d54f2e8c31a66870ce1c8bc.tar.bz2 |
* doc/Utf.3:
* generic/tclStubInit.c:
* generic/tcl.decls:
* generic/tclDecls.h:
* generic/tclUtf.c: Added new functions Tcl_UniCharNcasecmp and
Tcl_UniCharCaseMatch (unicode parallel to Tcl_StringCaseMatch)
* generic/tclUtil.c: rewrote Tcl_StringCaseMatch algorithm for
optimization and made Tcl_StringMatch just call Tcl_StringCaseMatch
Diffstat (limited to 'generic/tclUtil.c')
-rw-r--r-- | generic/tclUtil.c | 189 |
1 files changed, 46 insertions, 143 deletions
diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 6e99f32..8a2aa94 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -10,7 +10,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclUtil.c,v 1.17 1999/12/12 02:26:43 hobbs Exp $ + * RCS: @(#) $Id: tclUtil.c,v 1.18 2000/05/08 21:59:59 hobbs Exp $ */ #include "tclInt.h" @@ -1136,131 +1136,7 @@ Tcl_StringMatch(string, pattern) CONST char *pattern; /* Pattern, which may contain special * characters. */ { - int p, s; - CONST char *pstart = pattern; - - while (1) { - p = *pattern; - s = *string; - - /* - * See if we're at the end of both the pattern and the string. If - * so, we succeeded. If we're at the end of the pattern but not at - * the end of the string, we failed. - */ - - if (p == '\0') { - if (s == '\0') { - return 1; - } else { - return 0; - } - } - if ((s == '\0') && (p != '*')) { - return 0; - } - - /* Check for a "*" as the next pattern character. It matches - * any substring. We handle this by calling ourselves - * recursively for each postfix of string, until either we - * match or we reach the end of the string. - */ - - if (p == '*') { - pattern++; - if (*pattern == '\0') { - return 1; - } - while (1) { - if (Tcl_StringMatch(string, pattern)) { - return 1; - } - if (*string == '\0') { - return 0; - } - string++; - } - } - - /* Check for a "?" as the next pattern character. It matches - * any single character. - */ - - if (p == '?') { - Tcl_UniChar ch; - - pattern++; - string += Tcl_UtfToUniChar(string, &ch); - continue; - } - - /* Check for a "[" as the next pattern character. It is followed - * by a list of characters that are acceptable, or by a range - * (two characters separated by "-"). - */ - - if (p == '[') { - Tcl_UniChar ch, startChar, endChar; - - pattern++; - string += Tcl_UtfToUniChar(string, &ch); - - while (1) { - if ((*pattern == ']') || (*pattern == '\0')) { - return 0; - } - pattern += Tcl_UtfToUniChar(pattern, &startChar); - if (*pattern == '-') { - pattern++; - if (*pattern == '\0') { - return 0; - } - pattern += Tcl_UtfToUniChar(pattern, &endChar); - if (((startChar <= ch) && (ch <= endChar)) - || ((endChar <= ch) && (ch <= startChar))) { - /* - * Matches ranges of form [a-z] or [z-a]. - */ - - break; - } - } else if (startChar == ch) { - break; - } - } - while (*pattern != ']') { - if (*pattern == '\0') { - pattern = Tcl_UtfPrev(pattern, pstart); - break; - } - pattern++; - } - pattern++; - continue; - } - - /* If the next pattern character is '\', just strip off the '\' - * so we do exact matching on the character that follows. - */ - - if (p == '\\') { - pattern++; - p = *pattern; - if (p == '\0') { - return 0; - } - } - - /* There's no special character. Just make sure that the next - * bytes of each string match. - */ - - if (s != p) { - return 0; - } - pattern++; - string++; - } + return Tcl_StringCaseMatch(string, pattern, 0); } /* @@ -1290,13 +1166,12 @@ Tcl_StringCaseMatch(string, pattern, nocase) * characters. */ int nocase; /* 0 for case sensitive, 1 for insensitive */ { - int p, s; + int p; CONST char *pstart = pattern; Tcl_UniChar ch1, ch2; while (1) { p = *pattern; - s = *string; /* * See if we're at the end of both the pattern and the string. If @@ -1305,35 +1180,61 @@ Tcl_StringCaseMatch(string, pattern, nocase) */ if (p == '\0') { - return (s == '\0'); + return (*string == '\0'); } - if ((s == '\0') && (p != '*')) { + if ((*string == '\0') && (p != '*')) { return 0; } - /* Check for a "*" as the next pattern character. It matches + /* + * Check for a "*" as the next pattern character. It matches * any substring. We handle this by calling ourselves * recursively for each postfix of string, until either we * match or we reach the end of the string. */ if (p == '*') { - pattern++; - if (*pattern == '\0') { + /* + * Skip all successive *'s in the pattern + */ + while (*(++pattern) == '*') {} + p = *pattern; + if (p == '\0') { return 1; } while (1) { + /* + * Optimization for matching - cruise through the string + * quickly if the next char in the pattern isn't a special + * character + */ + if ((p != '[') && (p != '?') && (p != '\\')) { + if (nocase) { + while (*string && (p != *string)) { + ch2 = Tcl_UtfToUniChar(string, &ch1); + if (p == Tcl_UniCharToLower(ch1)) { + break; + } + string += ch2; + } + } else { + while (*string && (p != *string)) { + string += Tcl_UtfToUniChar(string, &ch1); + } + } + } if (Tcl_StringCaseMatch(string, pattern, nocase)) { return 1; } if (*string == '\0') { return 0; } - string++; + string += Tcl_UtfToUniChar(string, &ch1); } } - /* Check for a "?" as the next pattern character. It matches + /* + * Check for a "?" as the next pattern character. It matches * any single character. */ @@ -1343,11 +1244,12 @@ Tcl_StringCaseMatch(string, pattern, nocase) continue; } - /* Check for a "[" as the next pattern character. It is followed + /* + * Check for a "[" as the next pattern character. It is followed * by a list of characters that are acceptable, or by a range * (two characters separated by "-"). */ - + if (p == '[') { Tcl_UniChar startChar, endChar; @@ -1396,22 +1298,23 @@ Tcl_StringCaseMatch(string, pattern, nocase) continue; } - /* If the next pattern character is '\', just strip off the '\' + /* + * If the next pattern character is '\', just strip off the '\' * so we do exact matching on the character that follows. */ - + if (p == '\\') { pattern++; - p = *pattern; - if (p == '\0') { + if (*pattern == '\0') { return 0; } } - /* There's no special character. Just make sure that the next + /* + * There's no special character. Just make sure that the next * bytes of each string match. */ - + string += Tcl_UtfToUniChar(string, &ch1); pattern += Tcl_UtfToUniChar(pattern, &ch2); if (nocase) { |