* generic/tclExecute.c (TclExecuteByteCode INST_STR_MATCH):

* generic/tclCmdMZ.c (Tcl_StringObjCmd STR_MATCH): * generic/tclUtf.c (TclUniCharMatch): * generic/tclInt.decls: add private TclUniCharMatch function that * generic/tclIntDecls.h: does string match on counted unicode * generic/tclStubInit.c: strings. Tcl_UniCharCaseMatch has the * tests/string.test: failing that it can't handle strings or * tests/stringComp.test: patterns with embedded NULLs. Added tests that actually try strings/pats with NULLs. TclUniCharMatch should be TIPed and made public in the next minor version rev.
author: hobbs <hobbs> 2003-02-18 02:25:41 (GMT)
committer: hobbs <hobbs> 2003-02-18 02:25:41 (GMT)
commit: 4ab5c4158044ba81cf3aa93c71d446fddd9c7ca5 (patch)
tree: 4897dca0fb72c73e8c8bee4e5d6b292195b07662 /generic
parent: d86b1af8bec78fdbcc8bf65bc205fd287e19fd5d (diff)
download: tcl-4ab5c4158044ba81cf3aa93c71d446fddd9c7ca5.zip
tcl-4ab5c4158044ba81cf3aa93c71d446fddd9c7ca5.tar.gz
tcl-4ab5c4158044ba81cf3aa93c71d446fddd9c7ca5.tar.bz2
6 files changed, 229 insertions, 13 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index d3deaae..2339965 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -14,7 +14,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.80 2003/01/17 14:19:44 vincentdarley Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.81 2003/02/18 02:25:43 hobbs Exp $
  */
 
 #include "tclInt.h"
@@ -2008,6 +2008,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 	    break;
 	}
 	case STR_MATCH: {
+	    Tcl_UniChar *ustring1, *ustring2;
 	    int nocase = 0;
 
 	    if (objc < 4 || objc > 5) {
@@ -2027,10 +2028,10 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 		    return TCL_ERROR;
 		}
 	    }
-
-	    Tcl_SetBooleanObj(resultPtr,
-		    Tcl_UniCharCaseMatch(Tcl_GetUnicode(objv[objc-1]),
-			    Tcl_GetUnicode(objv[objc-2]), nocase));
+	    ustring1 = Tcl_GetUnicodeFromObj(objv[objc-1], &length1);
+	    ustring2 = Tcl_GetUnicodeFromObj(objv[objc-2], &length2);
+	    Tcl_SetBooleanObj(resultPtr, TclUniCharMatch(ustring1, length1,
+		    ustring2, length2, nocase));
 	    break;
 	}
 	case STR_RANGE: {
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index 93bf3a9..3333959 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -11,7 +11,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclExecute.c,v 1.92 2003/02/06 22:44:57 mdejong Exp $
+ * RCS: @(#) $Id: tclExecute.c,v 1.93 2003/02/18 02:25:44 hobbs Exp $
  */
 
 #include "tclInt.h"
@@ -2650,10 +2650,16 @@ TclExecuteByteCode(interp, codePtr)
 	 * Check that at least one of the objects is Unicode before
 	 * promoting both.
 	 */
+
 	if ((valuePtr->typePtr == &tclStringType)
 	        || (value2Ptr->typePtr == &tclStringType)) {
-	    match = Tcl_UniCharCaseMatch(Tcl_GetUnicode(valuePtr),
-	            Tcl_GetUnicode(value2Ptr), nocase);
+	    Tcl_UniChar *ustring1, *ustring2;
+	    int length1, length2;
+
+	    ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length1);
+	    ustring2 = Tcl_GetUnicodeFromObj(value2Ptr, &length2);
+	    match = TclUniCharMatch(ustring1, length1, ustring2, length2,
+		    nocase);
 	} else {
 	    match = Tcl_StringCaseMatch(TclGetString(valuePtr),
 		    TclGetString(value2Ptr), nocase);
diff --git a/generic/tclInt.decls b/generic/tclInt.decls
index f2b2f99..8a68f70 100644
--- a/generic/tclInt.decls
+++ b/generic/tclInt.decls
@@ -12,7 +12,7 @@
 # See the file "license.terms" for information on usage and redistribution
 # of this file, and for a DISCLAIMER OF ALL WARRANTIES.
 # 
-# RCS: @(#) $Id: tclInt.decls,v 1.58 2002/12/06 23:22:59 hobbs Exp $
+# RCS: @(#) $Id: tclInt.decls,v 1.59 2003/02/18 02:25:45 hobbs Exp $
 
 library tcl
 
@@ -690,6 +690,13 @@ declare 172 generic {
     int TclInThreadExit(void)
 }
 
+# added for 8.4.2
+
+declare 173 generic {
+    int TclUniCharMatch (CONST Tcl_UniChar *string, int strLen, \
+	    CONST Tcl_UniChar *pattern, int ptnLen, int nocase)
+}
+
 ##############################################################################
 
 # Define the platform specific internal Tcl interface. These functions are
diff --git a/generic/tclIntDecls.h b/generic/tclIntDecls.h
index 5c23144..e41dca6 100644
--- a/generic/tclIntDecls.h
+++ b/generic/tclIntDecls.h
@@ -11,7 +11,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclIntDecls.h,v 1.48 2002/11/07 02:13:36 mdejong Exp $
+ * RCS: @(#) $Id: tclIntDecls.h,v 1.49 2003/02/18 02:25:45 hobbs Exp $
  */
 
 #ifndef _TCLINTDECLS
@@ -498,6 +498,11 @@ EXTERN int		TclCheckExecutionTraces _ANSI_ARGS_((
 				Tcl_Obj *CONST objv[]));
 /* 172 */
 EXTERN int		TclInThreadExit _ANSI_ARGS_((void));
+/* 173 */
+EXTERN int		TclUniCharMatch _ANSI_ARGS_((
+				CONST Tcl_UniChar * string, int strLen, 
+				CONST Tcl_UniChar * pattern, int ptnLen, 
+				int nocase));
 
 typedef struct TclIntStubs {
     int magic;
@@ -700,6 +705,7 @@ typedef struct TclIntStubs {
     int (*tclCheckInterpTraces) _ANSI_ARGS_((Tcl_Interp * interp, CONST char * command, int numChars, Command * cmdPtr, int result, int traceFlags, int objc, Tcl_Obj *CONST objv[])); /* 170 */
     int (*tclCheckExecutionTraces) _ANSI_ARGS_((Tcl_Interp * interp, CONST char * command, int numChars, Command * cmdPtr, int result, int traceFlags, int objc, Tcl_Obj *CONST objv[])); /* 171 */
     int (*tclInThreadExit) _ANSI_ARGS_((void)); /* 172 */
+    int (*tclUniCharMatch) _ANSI_ARGS_((CONST Tcl_UniChar * string, int strLen, CONST Tcl_UniChar * pattern, int ptnLen, int nocase)); /* 173 */
 } TclIntStubs;
 
 #ifdef __cplusplus
@@ -1306,6 +1312,10 @@ extern TclIntStubs *tclIntStubsPtr;
 #define TclInThreadExit \
 	(tclIntStubsPtr->tclInThreadExit) /* 172 */
 #endif
+#ifndef TclUniCharMatch
+#define TclUniCharMatch \
+	(tclIntStubsPtr->tclUniCharMatch) /* 173 */
+#endif
 
 #endif /* defined(USE_TCL_STUBS) && !defined(USE_TCL_STUB_PROCS) */
 
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index c25f053..f303e0a 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -8,7 +8,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclStubInit.c,v 1.78 2002/12/06 23:22:59 hobbs Exp $
+ * RCS: @(#) $Id: tclStubInit.c,v 1.79 2003/02/18 02:25:45 hobbs Exp $
  */
 
 #include "tclInt.h"
@@ -244,6 +244,7 @@ TclIntStubs tclIntStubs = {
     TclCheckInterpTraces, /* 170 */
     TclCheckExecutionTraces, /* 171 */
     TclInThreadExit, /* 172 */
+    TclUniCharMatch, /* 173 */
 };
 
 TclIntPlatStubs tclIntPlatStubs = {
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 72a23ca..6c6835c 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -8,7 +8,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclUtf.c,v 1.29 2002/11/12 02:26:29 hobbs Exp $
+ * RCS: @(#) $Id: tclUtf.c,v 1.30 2003/02/18 02:25:45 hobbs Exp $
  */
 
 #include "tclInt.h"
@@ -1584,7 +1584,10 @@ Tcl_UniCharIsWordChar(ch)
  *
  *	See if a particular Unicode string matches a particular pattern.
  *	Allows case insensitivity.  This is the Unicode equivalent of
- *	the char* Tcl_StringCaseMatch.
+ *	the char* Tcl_StringCaseMatch.  The UniChar strings must be
+ *	NULL-terminated.  This has no provision for counted UniChar
+ *	strings, thus should not be used where NULLs are expected in the
+ *	UniChar string.  Use TclUniCharMatch where possible.
  *
  * Results:
  *	The return value is 1 if string matches pattern, and
@@ -1755,3 +1758,191 @@ Tcl_UniCharCaseMatch(string, pattern, nocase)
 	pattern++;
     }
 }
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclUniCharMatch --
+ *
+ *	See if a particular Unicode string matches a particular pattern.
+ *	Allows case insensitivity.  This is the Unicode equivalent of the
+ *	char* Tcl_StringCaseMatch.  This variant of Tcl_UniCharCaseMatch
+ *	uses counted Strings, so embedded NULLs are allowed.
+ *
+ * Results:
+ *	The return value is 1 if string matches pattern, and
+ *	0 otherwise.  The matching operation permits the following
+ *	special characters in the pattern: *?\[] (see the manual
+ *	entry for details on what these mean).
+ *
+ * Side effects:
+ *	None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclUniCharMatch(string, strLen, pattern, ptnLen, nocase)
+    CONST Tcl_UniChar *string;	/* Unicode String. */
+    int strLen;			/* length of String */
+    CONST Tcl_UniChar *pattern;	/* Pattern, which may contain special
+				 * characters. */
+    int ptnLen;			/* length of Pattern */
+    int nocase;			/* 0 for case sensitive, 1 for insensitive */
+{
+    CONST Tcl_UniChar *stringEnd, *patternEnd;
+    Tcl_UniChar p;
+
+    stringEnd  = string + strLen;
+    patternEnd = pattern + ptnLen;
+
+    while (1) {
+	/*
+	 * See if we're at the end of both the pattern and the string.  If
+	 * so, we succeeded.  If we're at the end of the pattern but not at
+	 * the end of the string, we failed.
+	 */
+
+	if (pattern == patternEnd) {
+	    return (string == stringEnd);
+	}
+	p = *pattern;
+	if ((string == stringEnd) && (p != '*')) {
+	    return 0;
+	}
+
+	/*
+	 * Check for a "*" as the next pattern character.  It matches any
+	 * substring.  We handle this by skipping all the characters up to the
+	 * next matching one in the pattern, and then calling ourselves
+	 * recursively for each postfix of string, until either we match or we
+	 * reach the end of the string.
+	 */
+	
+	if (p == '*') {
+	    /*
+	     * Skip all successive *'s in the pattern
+	     */
+	    while (*(++pattern) == '*') {}
+	    if (pattern == patternEnd) {
+		return 1;
+	    }
+	    p = *pattern;
+	    if (nocase) {
+		p = Tcl_UniCharToLower(p);
+	    }
+	    while (1) {
+		/*
+		 * Optimization for matching - cruise through the string
+		 * quickly if the next char in the pattern isn't a special
+		 * character
+		 */
+		if ((p != '[') && (p != '?') && (p != '\\')) {
+		    if (nocase) {
+			while ((string < stringEnd) && (p != *string)
+				&& (p != Tcl_UniCharToLower(*string))) {
+			    string++;
+			}
+		    } else {
+			while ((string < stringEnd) && (p != *string)) {
+			    string++;
+			}
+		    }
+		}
+		if (TclUniCharMatch(string, stringEnd - string,
+			pattern, patternEnd - pattern, nocase)) {
+		    return 1;
+		}
+		if (string == stringEnd) {
+		    return 0;
+		}
+		string++;
+	    }
+	}
+
+	/*
+	 * Check for a "?" as the next pattern character.  It matches
+	 * any single character.
+	 */
+
+	if (p == '?') {
+	    pattern++;
+	    string++;
+	    continue;
+	}
+
+	/*
+	 * Check for a "[" as the next pattern character.  It is followed
+	 * by a list of characters that are acceptable, or by a range
+	 * (two characters separated by "-").
+	 */
+	
+	if (p == '[') {
+	    Tcl_UniChar ch1, startChar, endChar;
+
+	    pattern++;
+	    ch1 = (nocase ? Tcl_UniCharToLower(*string) : *string);
+	    string++;
+	    while (1) {
+		if ((*pattern == ']') || (pattern == patternEnd)) {
+		    return 0;
+		}
+		startChar = (nocase ? Tcl_UniCharToLower(*pattern) : *pattern);
+		pattern++;
+		if (*pattern == '-') {
+		    pattern++;
+		    if (pattern == patternEnd) {
+			return 0;
+		    }
+		    endChar = (nocase ? Tcl_UniCharToLower(*pattern)
+			    : *pattern);
+		    pattern++;
+		    if (((startChar <= ch1) && (ch1 <= endChar))
+			    || ((endChar <= ch1) && (ch1 <= startChar))) {
+			/*
+			 * Matches ranges of form [a-z] or [z-a].
+			 */
+			break;
+		    }
+		} else if (startChar == ch1) {
+		    break;
+		}
+	    }
+	    while (*pattern != ']') {
+		if (pattern == patternEnd) {
+		    pattern--;
+		    break;
+		}
+		pattern++;
+	    }
+	    pattern++;
+	    continue;
+	}
+
+	/*
+	 * If the next pattern character is '\', just strip off the '\'
+	 * so we do exact matching on the character that follows.
+	 */
+
+	if (p == '\\') {
+	    if (++pattern == patternEnd) {
+		return 0;
+	    }
+	}
+
+	/*
+	 * There's no special character.  Just make sure that the next
+	 * bytes of each string match.
+	 */
+
+	if (nocase) {
+	    if (Tcl_UniCharToLower(*string) != Tcl_UniCharToLower(*pattern)) {
+		return 0;
+	    }
+	} else if (*string != *pattern) {
+	    return 0;
+	}
+	string++;
+	pattern++;
+    }
+}
author	hobbs <hobbs>	2003-02-18 02:25:41 (GMT)
committer	hobbs <hobbs>	2003-02-18 02:25:41 (GMT)
commit	4ab5c4158044ba81cf3aa93c71d446fddd9c7ca5 (patch)
tree	4897dca0fb72c73e8c8bee4e5d6b292195b07662 /generic
parent	d86b1af8bec78fdbcc8bf65bc205fd287e19fd5d (diff)
download	tcl-4ab5c4158044ba81cf3aa93c71d446fddd9c7ca5.zip tcl-4ab5c4158044ba81cf3aa93c71d446fddd9c7ca5.tar.gz tcl-4ab5c4158044ba81cf3aa93c71d446fddd9c7ca5.tar.bz2