* doc/Utf.3:

* generic/tclStubInit.c: * generic/tcl.decls: * generic/tclDecls.h: * generic/tclUtf.c: Added new functions Tcl_UniCharNcasecmp and Tcl_UniCharCaseMatch (unicode parallel to Tcl_StringCaseMatch) * generic/tclUtil.c: rewrote Tcl_StringCaseMatch algorithm for optimization and made Tcl_StringMatch just call Tcl_StringCaseMatch
author: hobbs <hobbs> 2000-05-08 21:59:58 (GMT)
committer: hobbs <hobbs> 2000-05-08 21:59:58 (GMT)
commit: 09f4c1de476f86324d54f2e8c31a66870ce1c8bc (patch)
tree: 025da577bdce141098365ffb242ca0ae0be52104 /generic
parent: 63adaf2eb6d8949c310ea3f93c699ed6dd1c8839 (diff)
download: tcl-09f4c1de476f86324d54f2e8c31a66870ce1c8bc.zip
tcl-09f4c1de476f86324d54f2e8c31a66870ce1c8bc.tar.gz
tcl-09f4c1de476f86324d54f2e8c31a66870ce1c8bc.tar.bz2
5 files changed, 292 insertions, 148 deletions
diff --git a/generic/tcl.decls b/generic/tcl.decls
index 1f09cfa..39a6c21 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -10,7 +10,7 @@
 # See the file "license.terms" for information on usage and redistribution
 # of this file, and for a DISCLAIMER OF ALL WARRANTIES.
 # 
-# RCS: @(#) $Id: tcl.decls,v 1.34 2000/05/02 22:02:33 kupries Exp $
+# RCS: @(#) $Id: tcl.decls,v 1.35 2000/05/08 21:59:58 hobbs Exp $
 
 library tcl
 
@@ -1379,6 +1379,14 @@ declare 400 generic {
     int Tcl_IsChannelExisting (CONST char* channelName)
 }
 
+declare 401 generic {
+    int Tcl_UniCharNcasecmp(CONST Tcl_UniChar *cs, CONST Tcl_UniChar *ct,\
+	    unsigned long n)
+}
+declare 402 generic {
+    int Tcl_UniCharCaseMatch(CONST Tcl_UniChar *ustr, \
+	    CONST Tcl_UniChar *pattern, int nocase)
+}
 
 ##############################################################################
 
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index 150d471..56c1d76 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -8,7 +8,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclDecls.h,v 1.35 2000/05/02 22:02:33 kupries Exp $
+ * RCS: @(#) $Id: tclDecls.h,v 1.36 2000/05/08 21:59:59 hobbs Exp $
  */
 
 #ifndef _TCLDECLS
@@ -1246,6 +1246,14 @@ EXTERN void		Tcl_ClearChannelHandlers _ANSI_ARGS_((
 /* 400 */
 EXTERN int		Tcl_IsChannelExisting _ANSI_ARGS_((
 				CONST char* channelName));
+/* 401 */
+EXTERN int		Tcl_UniCharNcasecmp _ANSI_ARGS_((
+				CONST Tcl_UniChar * cs, 
+				CONST Tcl_UniChar * ct, unsigned long n));
+/* 402 */
+EXTERN int		Tcl_UniCharCaseMatch _ANSI_ARGS_((
+				CONST Tcl_UniChar * ustr, 
+				CONST Tcl_UniChar * pattern, int nocase));
 
 typedef struct TclStubHooks {
     struct TclPlatStubs *tclPlatStubs;
@@ -1714,6 +1722,8 @@ typedef struct TclStubs {
     void (*tcl_SpliceChannel) _ANSI_ARGS_((Tcl_Channel channel)); /* 398 */
     void (*tcl_ClearChannelHandlers) _ANSI_ARGS_((Tcl_Channel channel)); /* 399 */
     int (*tcl_IsChannelExisting) _ANSI_ARGS_((CONST char* channelName)); /* 400 */
+    int (*tcl_UniCharNcasecmp) _ANSI_ARGS_((CONST Tcl_UniChar * cs, CONST Tcl_UniChar * ct, unsigned long n)); /* 401 */
+    int (*tcl_UniCharCaseMatch) _ANSI_ARGS_((CONST Tcl_UniChar * ustr, CONST Tcl_UniChar * pattern, int nocase)); /* 402 */
 } TclStubs;
 
 #ifdef __cplusplus
@@ -3363,6 +3373,14 @@ extern TclStubs *tclStubsPtr;
 #define Tcl_IsChannelExisting \
 	(tclStubsPtr->tcl_IsChannelExisting) /* 400 */
 #endif
+#ifndef Tcl_UniCharNcasecmp
+#define Tcl_UniCharNcasecmp \
+	(tclStubsPtr->tcl_UniCharNcasecmp) /* 401 */
+#endif
+#ifndef Tcl_UniCharCaseMatch
+#define Tcl_UniCharCaseMatch \
+	(tclStubsPtr->tcl_UniCharCaseMatch) /* 402 */
+#endif
 
 #endif /* defined(USE_TCL_STUBS) && !defined(USE_TCL_STUB_PROCS) */
 
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index e9d6577..c2ac1e5 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -8,7 +8,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclStubInit.c,v 1.36 2000/05/02 22:02:35 kupries Exp $
+ * RCS: @(#) $Id: tclStubInit.c,v 1.37 2000/05/08 21:59:59 hobbs Exp $
  */
 
 #include "tclInt.h"
@@ -798,6 +798,8 @@ TclStubs tclStubs = {
     Tcl_SpliceChannel, /* 398 */
     Tcl_ClearChannelHandlers, /* 399 */
     Tcl_IsChannelExisting, /* 400 */
+    Tcl_UniCharNcasecmp, /* 401 */
+    Tcl_UniCharCaseMatch, /* 402 */
 };
 
 /* !END!: Do not edit above this line. */
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 5fe3c41..b62a26c 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -8,7 +8,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclUtf.c,v 1.11 2000/01/11 22:09:00 hobbs Exp $
+ * RCS: @(#) $Id: tclUtf.c,v 1.12 2000/05/08 21:59:58 hobbs Exp $
  */
 
 #include "tclInt.h"
@@ -1301,7 +1301,43 @@ Tcl_UniCharNcmp(cs, ct, n)
 {
     for ( ; n != 0; n--, cs++, ct++) {
 	if (*cs != *ct) {
-	    return *cs - *ct;
+	    return (*cs - *ct);
+	}
+	if (*cs == '\0') {
+	    break;
+	}
+    }
+    return 0;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_UniCharNcasecmp --
+ *
+ *	Compare at most n unichars of string cs to string ct case
+ *	insensitive.  Both cs and ct are assumed to be at least n
+ *	unichars long.
+ *
+ * Results:
+ *	Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
+ *
+ * Side effects:
+ *	None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharNcasecmp(cs, ct, n)
+    CONST Tcl_UniChar *cs;		/* Unicode string to compare to ct. */
+    CONST Tcl_UniChar *ct;		/* Unicode string cs is compared to. */
+    unsigned long n;			/* Number of unichars to compare. */
+{
+    for ( ; n != 0; n--, cs++, ct++) {
+	if ((*cs != *ct) &&
+		(Tcl_UniCharToLower(*cs) != Tcl_UniCharToLower(*ct))) {
+	    return (*cs - *ct);
 	}
 	if (*cs == '\0') {
 	    break;
@@ -1584,3 +1620,180 @@ Tcl_UniCharIsWordChar(ch)
 
     return (((ALPHA_BITS | DIGIT_BITS | CONNECTOR_BITS) >> category) & 1);
 }
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_UniCharCaseMatch --
+ *
+ *	See if a particular Unicode string matches a particular pattern.
+ *	Allows case insensitivity.  Thie is the Unicode equivalent of
+ *	the char* Tcl_StringCaseMatch.
+ *
+ * Results:
+ *	The return value is 1 if string matches pattern, and
+ *	0 otherwise.  The matching operation permits the following
+ *	special characters in the pattern: *?\[] (see the manual
+ *	entry for details on what these mean).
+ *
+ * Side effects:
+ *	None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharCaseMatch(string, pattern, nocase)
+    CONST Tcl_UniChar *string;	/* Unicode String. */
+    CONST Tcl_UniChar *pattern;	/* Pattern, which may contain special
+				 * characters. */
+    int nocase;			/* 0 for case sensitive, 1 for insensitive */
+{
+    Tcl_UniChar ch1, p;
+    
+    while (1) {
+	p = *pattern;
+	
+	/*
+	 * See if we're at the end of both the pattern and the string.  If
+	 * so, we succeeded.  If we're at the end of the pattern but not at
+	 * the end of the string, we failed.
+	 */
+	
+	if (p == 0) {
+	    return (*string == 0);
+	}
+	if ((*string == 0) && (p != '*')) {
+	    return 0;
+	}
+
+	/*
+	 * Check for a "*" as the next pattern character.  It matches any
+	 * substring.  We handle this by skipping all the characters up to the
+	 * next matching one in the pattern, and then calling ourselves
+	 * recursively for each postfix of string, until either we match or we
+	 * reach the end of the string.
+	 */
+	
+	if (p == '*') {
+	    int pSpecial;
+	    /*
+	     * Skip all successive *'s in the pattern
+	     */
+	    while (*(++pattern) == '*') {}
+	    p = *pattern;
+	    if (p == 0) {
+		return 1;
+	    }
+	    while (1) {
+		/*
+		 * Optimization for matching - cruise through the string
+		 * quickly if the next char in the pattern isn't a special
+		 * character
+		 */
+		if ((p != '[') && (p != '?') && (p != '\\')) {
+		    if (nocase) {
+			while (*string && (p != *string)
+				&& (p != Tcl_UniCharToLower(*string))) {
+			    string++;
+			}
+		    } else {
+			while (*string && (p != *string)) { string++; }
+		    }
+		}
+		if (Tcl_UniCharCaseMatch(string, pattern, nocase)) {
+		    return 1;
+		}
+		if (*string == 0) {
+		    return 0;
+		}
+		string++;
+	    }
+	}
+
+	/*
+	 * Check for a "?" as the next pattern character.  It matches
+	 * any single character.
+	 */
+
+	if (p == '?') {
+	    pattern++;
+	    string++;
+	    continue;
+	}
+
+	/*
+	 * Check for a "[" as the next pattern character.  It is followed
+	 * by a list of characters that are acceptable, or by a range
+	 * (two characters separated by "-").
+	 */
+	
+	if (p == '[') {
+	    Tcl_UniChar startChar, endChar;
+
+	    pattern++;
+	    ch1 = (nocase ? Tcl_UniCharToLower(*string) : *string);
+	    string++;
+	    while (1) {
+		if ((*pattern == ']') || (*pattern == 0)) {
+		    return 0;
+		}
+		startChar = (nocase ? Tcl_UniCharToLower(*pattern) : *pattern);
+		pattern++;
+		if (*pattern == '-') {
+		    pattern++;
+		    if (*pattern == 0) {
+			return 0;
+		    }
+		    endChar = (nocase ? Tcl_UniCharToLower(*pattern)
+			    : *pattern);
+		    pattern++;
+		    if (((startChar <= ch1) && (ch1 <= endChar))
+			    || ((endChar <= ch1) && (ch1 <= startChar))) {
+			/*
+			 * Matches ranges of form [a-z] or [z-a].
+			 */
+			break;
+		    }
+		} else if (startChar == ch1) {
+		    break;
+		}
+	    }
+	    while (*pattern != ']') {
+		if (*pattern == 0) {
+		    pattern--;
+		    break;
+		}
+		pattern++;
+	    }
+	    pattern++;
+	    continue;
+	}
+
+	/*
+	 * If the next pattern character is '\', just strip off the '\'
+	 * so we do exact matching on the character that follows.
+	 */
+
+	if (p == '\\') {
+	    if (*(++pattern) == '\0') {
+		return 0;
+	    }
+	}
+
+	/*
+	 * There's no special character.  Just make sure that the next
+	 * bytes of each string match.
+	 */
+
+	if (nocase) {
+	    if (Tcl_UniCharToLower(*string) != Tcl_UniCharToLower(*pattern)) {
+		return 0;
+	    }
+	} else if (*string != *pattern) {
+	    return 0;
+	}
+	string++;
+	pattern++;
+    }
+}
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 6e99f32..8a2aa94 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -10,7 +10,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- *  RCS: @(#) $Id: tclUtil.c,v 1.17 1999/12/12 02:26:43 hobbs Exp $
+ *  RCS: @(#) $Id: tclUtil.c,v 1.18 2000/05/08 21:59:59 hobbs Exp $
  */
 
 #include "tclInt.h"
@@ -1136,131 +1136,7 @@ Tcl_StringMatch(string, pattern)
     CONST char *pattern;	/* Pattern, which may contain special
 				 * characters. */
 {
-    int p, s;
-    CONST char *pstart = pattern;
-    
-    while (1) {
-	p = *pattern;
-	s = *string;
-	
-	/*
-	 * See if we're at the end of both the pattern and the string.  If
-	 * so, we succeeded.  If we're at the end of the pattern but not at
-	 * the end of the string, we failed.
-	 */
-	
-	if (p == '\0') {
-	    if (s == '\0') {
-		return 1;
-	    } else {
-		return 0;
-	    }
-	}
-	if ((s == '\0') && (p != '*')) {
-	    return 0;
-	}
-
-	/* Check for a "*" as the next pattern character.  It matches
-	 * any substring.  We handle this by calling ourselves
-	 * recursively for each postfix of string, until either we
-	 * match or we reach the end of the string.
-	 */
-	
-	if (p == '*') {
-	    pattern++;
-	    if (*pattern == '\0') {
-		return 1;
-	    }
-	    while (1) {
-		if (Tcl_StringMatch(string, pattern)) {
-		    return 1;
-		}
-		if (*string == '\0') {
-		    return 0;
-		}
-		string++;
-	    }
-	}
-
-	/* Check for a "?" as the next pattern character.  It matches
-	 * any single character.
-	 */
-
-	if (p == '?') {
-	    Tcl_UniChar ch;
-	    
-	    pattern++;
-	    string += Tcl_UtfToUniChar(string, &ch);
-	    continue;
-	}
-
-	/* Check for a "[" as the next pattern character.  It is followed
-	 * by a list of characters that are acceptable, or by a range
-	 * (two characters separated by "-").
-	 */
-	
-	if (p == '[') {
-	    Tcl_UniChar ch, startChar, endChar;
-
-	    pattern++;
-	    string += Tcl_UtfToUniChar(string, &ch);
-
-	    while (1) {
-		if ((*pattern == ']') || (*pattern == '\0')) {
-		    return 0;
-		}
-		pattern += Tcl_UtfToUniChar(pattern, &startChar);
-		if (*pattern == '-') {
-		    pattern++;
-		    if (*pattern == '\0') {
-			return 0;
-		    }
-		    pattern += Tcl_UtfToUniChar(pattern, &endChar);
-		    if (((startChar <= ch) && (ch <= endChar))
-			    || ((endChar <= ch) && (ch <= startChar))) {
-			/*
-			 * Matches ranges of form [a-z] or [z-a].
-			 */
-
-			break;
-		    }
-		} else if (startChar == ch) {
-		    break;
-		}
-	    }
-	    while (*pattern != ']') {
-		if (*pattern == '\0') {
-		    pattern = Tcl_UtfPrev(pattern, pstart);
-		    break;
-		}
-		pattern++;
-	    }
-	    pattern++;
-	    continue;
-	}
-    
-	/* If the next pattern character is '\', just strip off the '\'
-	 * so we do exact matching on the character that follows.
-	 */
-	
-	if (p == '\\') {
-	    pattern++;
-	    p = *pattern;
-	    if (p == '\0') {
-		return 0;
-	    }
-	}
-
-	/* There's no special character.  Just make sure that the next
-	 * bytes of each string match.
-	 */
-	
-	if (s != p) {
-	    return 0;
-	}
-	pattern++;
-	string++;
-    }
+    return Tcl_StringCaseMatch(string, pattern, 0);
 }
 
 /*
@@ -1290,13 +1166,12 @@ Tcl_StringCaseMatch(string, pattern, nocase)
 				 * characters. */
     int nocase;			/* 0 for case sensitive, 1 for insensitive */
 {
-    int p, s;
+    int p;
     CONST char *pstart = pattern;
     Tcl_UniChar ch1, ch2;
     
     while (1) {
 	p = *pattern;
-	s = *string;
 	
 	/*
 	 * See if we're at the end of both the pattern and the string.  If
@@ -1305,35 +1180,61 @@ Tcl_StringCaseMatch(string, pattern, nocase)
 	 */
 	
 	if (p == '\0') {
-	    return (s == '\0');
+	    return (*string == '\0');
 	}
-	if ((s == '\0') && (p != '*')) {
+	if ((*string == '\0') && (p != '*')) {
 	    return 0;
 	}
 
-	/* Check for a "*" as the next pattern character.  It matches
+	/*
+	 * Check for a "*" as the next pattern character.  It matches
 	 * any substring.  We handle this by calling ourselves
 	 * recursively for each postfix of string, until either we
 	 * match or we reach the end of the string.
 	 */
 	
 	if (p == '*') {
-	    pattern++;
-	    if (*pattern == '\0') {
+	    /*
+	     * Skip all successive *'s in the pattern
+	     */
+	    while (*(++pattern) == '*') {}
+	    p = *pattern;
+	    if (p == '\0') {
 		return 1;
 	    }
 	    while (1) {
+		/*
+		 * Optimization for matching - cruise through the string
+		 * quickly if the next char in the pattern isn't a special
+		 * character
+		 */
+		if ((p != '[') && (p != '?') && (p != '\\')) {
+		    if (nocase) {
+			while (*string && (p != *string)) {
+			    ch2 = Tcl_UtfToUniChar(string, &ch1);
+			    if (p == Tcl_UniCharToLower(ch1)) {
+				break;
+			    }
+			    string += ch2;
+			}
+		    } else {
+			while (*string && (p != *string)) {
+			    string += Tcl_UtfToUniChar(string, &ch1);
+			}
+		    }
+		}
 		if (Tcl_StringCaseMatch(string, pattern, nocase)) {
 		    return 1;
 		}
 		if (*string == '\0') {
 		    return 0;
 		}
-		string++;
+		string += Tcl_UtfToUniChar(string, &ch1);
 	    }
 	}
 
-	/* Check for a "?" as the next pattern character.  It matches
+	/*
+	 * Check for a "?" as the next pattern character.  It matches
 	 * any single character.
 	 */
 
@@ -1343,11 +1244,12 @@ Tcl_StringCaseMatch(string, pattern, nocase)
 	    continue;
 	}
 
-	/* Check for a "[" as the next pattern character.  It is followed
+	/*
+	 * Check for a "[" as the next pattern character.  It is followed
 	 * by a list of characters that are acceptable, or by a range
 	 * (two characters separated by "-").
 	 */
-	
+
 	if (p == '[') {
 	    Tcl_UniChar startChar, endChar;
 
@@ -1396,22 +1298,23 @@ Tcl_StringCaseMatch(string, pattern, nocase)
 	    continue;
 	}
     
-	/* If the next pattern character is '\', just strip off the '\'
+	/*
+	 * If the next pattern character is '\', just strip off the '\'
 	 * so we do exact matching on the character that follows.
 	 */
-	
+
 	if (p == '\\') {
 	    pattern++;
-	    p = *pattern;
-	    if (p == '\0') {
+	    if (*pattern == '\0') {
 		return 0;
 	    }
 	}
 
-	/* There's no special character.  Just make sure that the next
+	/*
+	 * There's no special character.  Just make sure that the next
 	 * bytes of each string match.
 	 */
-	
+
 	string  += Tcl_UtfToUniChar(string, &ch1);
 	pattern += Tcl_UtfToUniChar(pattern, &ch2);
 	if (nocase) {
author	hobbs <hobbs>	2000-05-08 21:59:58 (GMT)
committer	hobbs <hobbs>	2000-05-08 21:59:58 (GMT)
commit	09f4c1de476f86324d54f2e8c31a66870ce1c8bc (patch)
tree	025da577bdce141098365ffb242ca0ae0be52104 /generic
parent	63adaf2eb6d8949c310ea3f93c699ed6dd1c8839 (diff)
download	tcl-09f4c1de476f86324d54f2e8c31a66870ce1c8bc.zip tcl-09f4c1de476f86324d54f2e8c31a66870ce1c8bc.tar.gz tcl-09f4c1de476f86324d54f2e8c31a66870ce1c8bc.tar.bz2