summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoraspect <aspect+tclcore@abstracted-spleen.org>2017-05-06 00:36:49 (GMT)
committeraspect <aspect+tclcore@abstracted-spleen.org>2017-05-06 00:36:49 (GMT)
commite12f29cce2b1255cb3a6e788b67148d415a13f06 (patch)
treefdbc52dc37e410842489414695a4245630a43133
parenta291109ed2c14d7a263b8b8132e2688266223c97 (diff)
downloadtcl-e12f29cce2b1255cb3a6e788b67148d415a13f06.zip
tcl-e12f29cce2b1255cb3a6e788b67148d415a13f06.tar.gz
tcl-e12f29cce2b1255cb3a6e788b67148d415a13f06.tar.bz2
streamline StringCaseMatch with switch and comments. BEHAVIOUR CHANGE: incomplete bracket groups now match nothing
-rw-r--r--generic/tclUtil.c278
1 files changed, 117 insertions, 161 deletions
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 34d4be2..17a796e 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -1920,206 +1920,162 @@ Tcl_StringCaseMatch(
{
int p, charLen;
CONST char *pstart = pattern;
- Tcl_UniChar ch1, ch2;
+ Tcl_UniChar pch, sch;
+ Tcl_UniChar startChar, endChar;
while (1) {
- p = *pattern;
-
- /*
- * See if we're at the end of both the pattern and the string. If so,
- * we succeeded. If we're at the end of the pattern but not at the end
- * of the string, we failed.
- */
+ switch (*pattern) {
+ case '\0':
+ return (*str == '\0');
- if (p == '\0') {
- return (*str == '\0');
- }
- if ((*str == '\0') && (p != '*')) {
- return 0;
- }
-
- /*
- * Check for a "*" as the next pattern character. It matches any
- * substring. We handle this by calling ourselves recursively for each
- * postfix of string, until either we match or we reach the end of the
- * string.
- */
-
- if (p == '*') {
- /*
- * Skip all successive *'s in the pattern
- */
+ case '?':
+ if (*str == '\0') {
+ return 0;
+ } else {
+ str += TclUtfToUniChar(str, &sch);
+ }
+ ++pattern;
+ break;
- while (*(++pattern) == '*') {}
- p = *pattern;
- if (p == '\0') {
- return 1;
- }
+ case '*':
+ /* skip runs of *** */
+ while (*(++pattern) == '*') {}
- /*
- * This is a special case optimization for single-byte utf.
- */
+ /* if end of pattern, we have a match */
+ if (*pattern == '\0') {
+ return 1;
+ }
- if (UCHAR(*pattern) < 0x80) {
- ch2 = (Tcl_UniChar)
+ /* peek at the next pattern char */
+ if (UCHAR(*pattern) < 0x80) {
+ pch = (Tcl_UniChar)
(nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern));
- } else {
- Tcl_UtfToUniChar(pattern, &ch2);
- if (nocase) {
- ch2 = Tcl_UniCharToLower(ch2);
+ } else {
+ TclUtfToUniChar(pattern, &pch);
+ if (nocase) {
+ pch = Tcl_UniCharToLower(pch);
+ }
}
- }
- while (1) {
- /*
- * Optimization for matching - cruise through the string
- * quickly if the next char in the pattern isn't a special
- * character
- */
-
- if ((p != '[') && (p != '?') && (p != '\\')) {
- if (nocase) {
+ /* if the next char in pattern is a literal, zoom through str to the next match */
+ switch (*pattern) {
+ case '[': case '?': case '\\':
+ break;
+ default:
while (*str) {
- charLen = TclUtfToUniChar(str, &ch1);
- if (ch2==ch1 || ch2==Tcl_UniCharToLower(ch1)) {
+ charLen = TclUtfToUniChar(str, &sch);
+ if ( (pch == sch)
+ || (nocase && (pch == Tcl_UniCharToLower(sch)))) {
break;
}
str += charLen;
}
- } else {
- /*
- * There's no point in trying to make this code
- * shorter, as the number of bytes you want to compare
- * each time is non-constant.
- */
+ }
- while (*str) {
- charLen = TclUtfToUniChar(str, &ch1);
- if (ch2 == ch1) {
- break;
- }
- str += charLen;
- }
+ while (*str != '\0') {
+ /* recursion! */
+ if (Tcl_StringCaseMatch(str, pattern, nocase)) {
+ return 1;
}
+ str += TclUtfToUniChar(str, &sch);
}
- if (Tcl_StringCaseMatch(str, pattern, nocase)) {
- return 1;
- }
- if (*str == '\0') {
- return 0;
- }
- str += TclUtfToUniChar(str, &ch1);
- }
- }
-
- /*
- * Check for a "?" as the next pattern character. It matches any
- * single character.
- */
-
- if (p == '?') {
- pattern++;
- str += TclUtfToUniChar(str, &ch1);
- continue;
- }
-
- /*
- * Check for a "[" as the next pattern character. It is followed by a
- * list of characters that are acceptable, or by a range (two
- * characters separated by "-").
- */
+ break;
- if (p == '[') {
- Tcl_UniChar startChar, endChar;
+ case '[':
+ ++pattern;
- pattern++;
- if (UCHAR(*str) < 0x80) {
- ch1 = (Tcl_UniChar)
+ if(UCHAR(*str) < 0x80) {
+ sch = (Tcl_UniChar)
(nocase ? tolower(UCHAR(*str)) : UCHAR(*str));
- str++;
- } else {
- str += Tcl_UtfToUniChar(str, &ch1);
- if (nocase) {
- ch1 = Tcl_UniCharToLower(ch1);
- }
- }
- while (1) {
- if ((*pattern == ']') || (*pattern == '\0')) {
- return 0;
- }
- if (UCHAR(*pattern) < 0x80) {
- startChar = (Tcl_UniChar) (nocase
- ? tolower(UCHAR(*pattern)) : UCHAR(*pattern));
- pattern++;
+ ++str;
} else {
- pattern += Tcl_UtfToUniChar(pattern, &startChar);
+ str += TclUtfToUniChar(str, &sch);
if (nocase) {
- startChar = Tcl_UniCharToLower(startChar);
+ sch = Tcl_UniCharToLower(sch);
}
}
- if (*pattern == '-') {
- pattern++;
+
+ while (1) {
+ if (*pattern == ']') {
+ /* end of range */
+ return 0;
+ }
if (*pattern == '\0') {
+ /* illegal pattern */
+ // WAS: break;
return 0;
}
if (UCHAR(*pattern) < 0x80) {
- endChar = (Tcl_UniChar) (nocase
- ? tolower(UCHAR(*pattern)) : UCHAR(*pattern));
- pattern++;
+ pch = (Tcl_UniChar)
+ (nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern));
+ ++pattern;
} else {
- pattern += Tcl_UtfToUniChar(pattern, &endChar);
+ pattern += TclUtfToUniChar(pattern, &pch);
if (nocase) {
- endChar = Tcl_UniCharToLower(endChar);
+ pch = Tcl_UniCharToLower(pch);
}
}
- if (((startChar <= ch1) && (ch1 <= endChar))
- || ((endChar <= ch1) && (ch1 <= startChar))) {
- /*
- * Matches ranges of form [a-z] or [z-a].
- */
-
+ startChar = pch;
+ if (*pattern == '-') {
+ ++pattern;
+ if (*pattern == '\0') {
+ /* illegal pattern */
+ return 0;
+ }
+ if (UCHAR(*pattern) < 0x80) {
+ pch = (Tcl_UniChar)
+ (nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern));
+ ++pattern;
+ } else {
+ pattern += TclUtfToUniChar(pattern, &pch);
+ if (nocase) {
+ pch = Tcl_UniCharToLower(pch);
+ }
+ }
+ endChar = pch;
+ if (((startChar <= sch) && (sch <= endChar))
+ || ((endChar <= sch) && (sch <= startChar))) {
+ /* matches ranges of form [a-z] or [z-a] */
+ break;
+ }
+ /* otherwise, process the rest of the [] */
+ continue;
+ } else if (startChar == sch) {
break;
}
- } else if (startChar == ch1) {
- break;
}
- }
- while (*pattern != ']') {
- if (*pattern == '\0') {
- pattern = Tcl_UtfPrev(pattern, pstart);
- break;
+ /* if we've matched in [], *pattern is still inside the brackets */
+ while (*pattern != ']') {
+ if (*pattern == '\0') {
+ /* illegal pattern */
+ // WAS: --pattern; break;
+ return 0;
+ }
+ pattern++;
}
pattern++;
- }
- pattern++;
- continue;
- }
-
- /*
- * If the next pattern character is '\', just strip off the '\' so we
- * do exact matching on the character that follows.
- */
-
- if (p == '\\') {
- pattern++;
- if (*pattern == '\0') {
- return 0;
- }
- }
+ break;
- /*
- * There's no special character. Just make sure that the next bytes of
- * each string match.
- */
+ case '\\':
+ ++pattern;
+ if (*pattern == '\0') {
+ /* illegal pattern */
+ return 0;
+ }
+ // fall through to literal match
- str += TclUtfToUniChar(str, &ch1);
- pattern += TclUtfToUniChar(pattern, &ch2);
- if (nocase) {
- if (Tcl_UniCharToLower(ch1) != Tcl_UniCharToLower(ch2)) {
- return 0;
- }
- } else if (ch1 != ch2) {
- return 0;
+ default:
+ /* literal match */
+ str += TclUtfToUniChar(str, &sch);
+ pattern += TclUtfToUniChar(pattern, &pch);
+ if (nocase) {
+ if (Tcl_UniCharToLower(sch) != Tcl_UniCharToLower(pch)) {
+ return 0;
+ }
+ } else if (sch != pch) {
+ return 0;
+ }
+ break;
}
}
}