Merged changes from scriptics-tclpro-1-3-b2 branch

author: stanton <stanton> 1999-05-22 01:20:10 (GMT)
committer: stanton <stanton> 1999-05-22 01:20:10 (GMT)
commit: ac39508cf97576cd9747c5630c4a13d794663b4a (patch)
tree: 4b7c61e6c670f227cf4d603907157fb6246d2d50 /generic
parent: 21bd132482f68735f5a4381934f56ee911904e87 (diff)
download: tcl-ac39508cf97576cd9747c5630c4a13d794663b4a.zip
tcl-ac39508cf97576cd9747c5630c4a13d794663b4a.tar.gz
tcl-ac39508cf97576cd9747c5630c4a13d794663b4a.tar.bz2
9 files changed, 520 insertions, 58 deletions
diff --git a/generic/tcl.decls b/generic/tcl.decls
index d77b076..ae5d445 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -10,7 +10,7 @@
 # See the file "license.terms" for information on usage and redistribution
 # of this file, and for a DISCLAIMER OF ALL WARRANTIES.
 # 
-# RCS: @(#) $Id: tcl.decls,v 1.12 1999/05/07 20:07:46 stanton Exp $
+# RCS: @(#) $Id: tcl.decls,v 1.13 1999/05/22 01:20:11 stanton Exp $
 
 library tcl
 
@@ -1262,6 +1262,21 @@ declare 369 generic {
 declare 370 generic {
     int Tcl_UtfNcasecmp(CONST char *s1, CONST char *s2, size_t n)
 }
+declare 371 generic {
+    int Tcl_StringCaseMatch(CONST char *str, CONST char *pattern, int nocase)
+}
+declare 372 generic {
+    int Tcl_UniCharIsControl(int ch)
+}
+declare 373 generic {
+    int Tcl_UniCharIsGraph(int ch)
+}
+declare 374 generic {
+    int Tcl_UniCharIsPrint(int ch)
+}
+declare 375 generic {
+    int Tcl_UniCharIsPunct(int ch)
+}
 
 ##############################################################################
 
diff --git a/generic/tcl.h b/generic/tcl.h
index 9524f87..829f0b7 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -12,13 +12,21 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tcl.h,v 1.43 1999/04/30 23:35:40 stanton Exp $
+ * RCS: @(#) $Id: tcl.h,v 1.44 1999/05/22 01:20:11 stanton Exp $
  */
 
 #ifndef _TCL
 #define _TCL
 
 /*
+ * For C++ compilers, use extern "C"
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+    
+/*
  * The following defines are used to indicate the various release levels.
  */
 
@@ -1543,4 +1551,12 @@ EXTERN int		Tcl_AppInit _ANSI_ARGS_((Tcl_Interp *interp));
 #undef TCL_STORAGE_CLASS
 #define TCL_STORAGE_CLASS DLLIMPORT
 
+/*
+ * end block for C++
+ */
+    
+#ifdef __cplusplus
+}
+#endif
+    
 #endif /* _TCL */
diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c
index 8aa6880..d59dfeb 100644
--- a/generic/tclCmdAH.c
+++ b/generic/tclCmdAH.c
@@ -11,7 +11,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclCmdAH.c,v 1.5 1999/04/16 00:46:43 stanton Exp $
+ * RCS: @(#) $Id: tclCmdAH.c,v 1.6 1999/05/22 01:20:12 stanton Exp $
  */
 
 #include "tclInt.h"
@@ -1517,7 +1517,7 @@ GetTypeFromMode(mode)
 /*
  *----------------------------------------------------------------------
  *
- * Tcl_FoObjCmd --
+ * Tcl_ForObjCmd --
  *
  *      This procedure is invoked to process the "for" Tcl command.
  *      See the user documentation for details on what it does.
@@ -1559,6 +1559,13 @@ Tcl_ForObjCmd(dummy, interp, objc, objv)
         return result;
     }
     while (1) {
+	/*
+	 * We need to reset the result before passing it off to
+	 * Tcl_ExprBooleanObj.  Otherwise, any error message will be appended
+	 * to the result of the last evaluation.
+	 */
+
+	Tcl_ResetResult(interp);
         result = Tcl_ExprBooleanObj(interp, objv[2], &value);
         if (result != TCL_OK) {
             return result;
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index dc5607c..5488773 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -13,7 +13,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.8 1999/05/06 22:50:03 stanton Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.9 1999/05/22 01:20:12 stanton Exp $
  */
 
 #include "tclInt.h"
@@ -896,7 +896,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 		}
 		if (nocase) {
 		    match = Tcl_UtfNcasecmp(string1, string2,
-			    (unsigned)length);
+			    (unsigned) length);
 		} else {
 		    match = Tcl_UtfNcmp(string1, string2,
 			    (unsigned) length);
@@ -912,7 +912,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 	    }
 
 	    if ((enum options) index == STR_EQUAL) {
-		Tcl_SetIntObj(resultPtr, (match) ? 0 : 1);
+		Tcl_SetBooleanObj(resultPtr, (match) ? 0 : 1);
 	    } else {
 		Tcl_SetIntObj(resultPtr, ((match > 0) ? 1 :
 					  (match < 0) ? -1 : 0));
@@ -921,21 +921,47 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 	}
 	case STR_FIRST: {
 	    register char *p, *end;
-	    int match;
+	    int match, utflen, start;
 
-	    if (objc != 4) {
-	        badFirstLastArgs:
-	        Tcl_WrongNumArgs(interp, 2, objv, "string1 string2");
+	    if (objc < 4 || objc > 5) {
+	        Tcl_WrongNumArgs(interp, 2, objv,
+				 "string1 string2 ?startIndex?");
 		return TCL_ERROR;
 	    }
 
 	    /*
 	     * This algorithm fails on improperly formed UTF strings.
+	     * We are searching string2 for the sequence string1.
 	     */
 
 	    match = -1;
+	    start = 0;
+	    utflen = -1;
 	    string1 = Tcl_GetStringFromObj(objv[2], &length1);
 	    string2 = Tcl_GetStringFromObj(objv[3], &length2);
+
+	    if (objc == 5) {
+		/*
+		 * If a startIndex is specified, we will need to fast forward
+		 * to that point in the string before we think about a match
+		 */
+		utflen = Tcl_NumUtfChars(string2, length2);
+		if (TclGetIntForIndex(interp, objv[4], utflen-1,
+				      &start) != TCL_OK) {
+		    return TCL_ERROR;
+		}
+		if (start >= utflen) {
+		    goto str_first_done;
+		} else if (start > 0) {
+		    if (length2 == utflen) {
+			/* no unicode chars */
+			string2 += start;
+		    } else {
+			string2 = Tcl_UtfAtIndex(string2, start);
+		    }
+		}
+	    }
+
 	    if (length1 > 0) {
 		end = string2 + length2 - length1 + 1;
 		for (p = string2;  p < end;  p++) {
@@ -955,19 +981,25 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 	    }
 
 	    /*
-	     * Compute the character index of the matching string by counting
-	     * the number of characters before the match.
+	     * Compute the character index of the matching string by
+	     * counting the number of characters before the match.
 	     */
-
+	str_first_done:
 	    if (match != -1) {
-		match = Tcl_NumUtfChars(string2, match);
+		if (objc == 4) {
+		    match = Tcl_NumUtfChars(string2, match);
+		} else if (length2 == utflen) {
+		    /* no unicode chars */
+		    match += start;
+		} else {
+		    match = start + Tcl_NumUtfChars(string2, match);
+		}
 	    }
 	    Tcl_SetIntObj(resultPtr, match);
 	    break;
 	}
 	case STR_INDEX: {
 	    int index;
-	    char buf[TCL_UTF_MAX];
 
 	    if (objc != 4) {
 	        Tcl_WrongNumArgs(interp, 2, objv, "string charIndex");
@@ -977,15 +1009,25 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 	    /*
 	     * establish what 'end' really means
 	     */
-	    length2 = Tcl_NumUtfChars(string1, length1) - 1;
-	    if (TclGetIntForIndex(interp, objv[3], length2,
+	    length2 = Tcl_NumUtfChars(string1, length1);
+	    if (TclGetIntForIndex(interp, objv[3], length2 - 1,
 				  &index) != TCL_OK) {
 	      return TCL_ERROR;
 	    }
-	    if ((index >= 0) && (index < length1)) {
-		length2 = Tcl_UniCharToUtf(Tcl_UniCharAtIndex(string1,
-							      index), buf);
-		Tcl_SetStringObj(resultPtr, buf, length2);
+	    /*
+	     * index must be between 0 and the UTF length to be valid
+	     */
+	    if ((index >= 0) && (index < length2)) {
+		if (length1 == length2) {
+		    /* no unicode chars */
+		    Tcl_SetStringObj(resultPtr, string1+index, 1);
+		} else {
+		    char buf[TCL_UTF_MAX];
+
+		    length2 = Tcl_UniCharToUtf(Tcl_UniCharAtIndex(string1,
+								  index), buf);
+		    Tcl_SetStringObj(resultPtr, buf, length2);
+		}
 	    }
 	    break;
 	}
@@ -997,18 +1039,18 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 	    Tcl_Obj *objPtr, *failVarObj = NULL;
 
 	    static char *isOptions[] = {
-		"alnum",	"alpha",	"ascii",
-		"boolean",	"digit",	"double",
-		"false",	"integer",	"lower",
-		"space",	"true",		"upper",
-		"wordchar",	(char *) NULL
+		"alnum",	"alpha",	"ascii",	"control",
+		"boolean",	"digit",	"double",	"false",
+		"graph",	"integer",	"lower",	"print",
+		"punct",	"space",	"true",		"upper",
+		"wordchar",	"xdigit",	(char *) NULL
 	    };
 	    enum isOptions {
-		STR_IS_ALNUM,	STR_IS_ALPHA,	STR_IS_ASCII,
-		STR_IS_BOOL,	STR_IS_DIGIT,	STR_IS_DOUBLE,
-		STR_IS_FALSE,	STR_IS_INT,	STR_IS_LOWER,
-		STR_IS_SPACE,	STR_IS_TRUE,	STR_IS_UPPER,
-		STR_IS_WORD
+		STR_IS_ALNUM,	STR_IS_ALPHA,	STR_IS_ASCII,	STR_IS_CONTROL,
+		STR_IS_BOOL,	STR_IS_DIGIT,	STR_IS_DOUBLE,	STR_IS_FALSE,
+		STR_IS_GRAPH,	STR_IS_INT,	STR_IS_LOWER,	STR_IS_PRINT,
+		STR_IS_PUNCT,	STR_IS_SPACE,	STR_IS_TRUE,	STR_IS_UPPER,
+		STR_IS_WORD,	STR_IS_XDIGIT
 	    };
 
 	    if (objc < 4 || objc > 7) {
@@ -1101,6 +1143,9 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 			result = 0;
 		    }
 		    break;
+		case STR_IS_CONTROL:
+		    chcomp = Tcl_UniCharIsControl;
+		    break;
 		case STR_IS_DIGIT:
 		    chcomp = Tcl_UniCharIsDigit;
 		    break;
@@ -1162,6 +1207,9 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 		    }
 		    break;
 		}
+		case STR_IS_GRAPH:
+		    chcomp = Tcl_UniCharIsGraph;
+		    break;
 		case STR_IS_INT: {
 		    char *stop;
 
@@ -1170,7 +1218,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 			break;
 		    }
 		    /*
-		     * Like STR_IS_DOUBLE, but we don't use strtoul.
+		     * Like STR_IS_DOUBLE, but we use strtoul.
 		     * Since Tcl_GetInt already failed, we set result to 0.
 		     */
 		    result = 0;
@@ -1204,6 +1252,12 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 		case STR_IS_LOWER:
 		    chcomp = Tcl_UniCharIsLower;
 		    break;
+		case STR_IS_PRINT:
+		    chcomp = Tcl_UniCharIsPrint;
+		    break;
+		case STR_IS_PUNCT:
+		    chcomp = Tcl_UniCharIsPunct;
+		    break;
 		case STR_IS_SPACE:
 		    chcomp = Tcl_UniCharIsSpace;
 		    break;
@@ -1213,6 +1267,17 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 		case STR_IS_WORD:
 		    chcomp = Tcl_UniCharIsWordChar;
 		    break;
+		case STR_IS_XDIGIT: {
+		    for (; string1 < end; string1++, failat++) {
+			/* INTL: We assume unicode is bad for this class */
+			if ((*((unsigned char *)string1) >= 0xC0) ||
+			    !isxdigit(*(unsigned char *)string1)) {
+			    result = 0;
+			    break;
+			}
+		    }
+		    break;
+		}
 	    }
 	    if (chcomp != NULL) {
 		for (; string1 < end; string1 += length2, failat++) {
@@ -1238,10 +1303,12 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 	}
 	case STR_LAST: {
 	    register char *p;
-	    int match;
+	    int match, utflen, start;
 
-	    if (objc != 4) {
-	        goto badFirstLastArgs;
+	    if (objc < 4 || objc > 5) {
+	        Tcl_WrongNumArgs(interp, 2, objv,
+				 "string1 string2 ?startIndex?");
+		return TCL_ERROR;
 	    }
 
 	    /*
@@ -1249,14 +1316,43 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 	     */
 
 	    match = -1;
+	    start = 0;
+	    utflen = -1;
 	    string1 = Tcl_GetStringFromObj(objv[2], &length1);
 	    string2 = Tcl_GetStringFromObj(objv[3], &length2);
+
+	    if (objc == 5) {
+		/*
+		 * If a startIndex is specified, we will need to restrict
+		 * the string range to that char index in the string
+		 */
+		utflen = Tcl_NumUtfChars(string2, length2);
+		if (TclGetIntForIndex(interp, objv[4], utflen-1,
+				      &start) != TCL_OK) {
+		    return TCL_ERROR;
+		}
+		if (start < 0) {
+		    goto str_last_done;
+		} else if (start < utflen) {
+		    if (length2 == utflen) {
+			/* no unicode chars */
+			p = string2 + start + 1 - length1;
+		    } else {
+			p = Tcl_UtfAtIndex(string2, start+1) - length1;
+		    }
+		} else {
+		    p = string2 + length2 - length1;
+		}
+	    } else {
+		p = string2 + length2 - length1;
+	    }
+
 	    if (length1 > 0) {
-		for (p = string2 + length2 - length1;  p >= string2;  p--) {
+		for (;  p >= string2;  p--) {
 		    /*
 		     * Scan backwards to find the first character.
 		     */
-		    
+
 		    while ((p != string2) && (*p != *string1)) {
 			p--;
 		    }
@@ -1271,9 +1367,12 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 	     * Compute the character index of the matching string by counting
 	     * the number of characters before the match.
 	     */
-
+	str_last_done:
 	    if (match != -1) {
-		match = Tcl_NumUtfChars(string2, match);
+		if ((objc == 4) || (length2 != utflen)) {
+		    /* only check when we've got unicode chars */
+		    match = Tcl_NumUtfChars(string2, match);
+		}
 	    }
 	    Tcl_SetIntObj(resultPtr, match);
 	    break;
@@ -1408,14 +1507,30 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 	    break;
 	}
 	case STR_MATCH: {
-	    if (objc != 4) {
-	        Tcl_WrongNumArgs(interp, 2, objv, "pattern string");
+	    int nocase = 0;
+
+	    if (objc < 4 || objc > 5) {
+	        Tcl_WrongNumArgs(interp, 2, objv, "?-nocase? pattern string");
 		return TCL_ERROR;
 	    }
 
-	    string1 = Tcl_GetStringFromObj(objv[2], &length1);
-	    string2 = Tcl_GetStringFromObj(objv[3], &length2);
-	    Tcl_SetBooleanObj(resultPtr, Tcl_StringMatch(string2, string1));
+	    if (objc == 5) {
+		string2 = Tcl_GetStringFromObj(objv[2], &length2);
+		if ((length2 > 1) &&
+		    strncmp(string2, "-nocase", (size_t) length2) == 0) {
+		    nocase = 1;
+		} else {
+		    Tcl_AppendStringsToObj(resultPtr, "bad option \"",
+					   string2, "\": must be -nocase",
+					   (char *) NULL);
+		    return TCL_ERROR;
+		}
+	    }
+
+	    Tcl_SetBooleanObj(resultPtr,
+			      Tcl_StringCaseMatch(Tcl_GetString(objv[objc-1]),
+						  Tcl_GetString(objv[objc-2]),
+						  nocase));
 	    break;
 	}
 	case STR_RANGE: {
@@ -1427,20 +1542,20 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 	    }
 
 	    string1 = Tcl_GetStringFromObj(objv[2], &length1);
-	    length1 = Tcl_NumUtfChars(string1, length1);
-	    if (TclGetIntForIndex(interp, objv[3], length1 - 1,
+	    length1 = Tcl_NumUtfChars(string1, length1) - 1;
+	    if (TclGetIntForIndex(interp, objv[3], length1,
 		    &first) != TCL_OK) {
 		return TCL_ERROR;
 	    }
-	    if (TclGetIntForIndex(interp, objv[4], length1 - 1,
+	    if (TclGetIntForIndex(interp, objv[4], length1,
 		    &last) != TCL_OK) {
 		return TCL_ERROR;
 	    }
 	    if (first < 0) {
 		first = 0;
 	    }
-	    if (last >= length1 - 1) {
-		last = length1 - 1;
+	    if (last >= length1) {
+		last = length1;
 	    }
 	    if (last >= first) {
 		char *start, *end;
@@ -1474,8 +1589,9 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 	case STR_REPLACE: {
 	    int first, last;
 
-	    if (!(objc == 5 || objc == 6)) {
-	        Tcl_WrongNumArgs(interp, 2, objv, "string first last ?string?");
+	    if (objc < 5 || objc > 6) {
+	        Tcl_WrongNumArgs(interp, 2, objv,
+				 "string first last ?string?");
 		return TCL_ERROR;
 	    }
 
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index ffb080d..a9177a6 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -8,7 +8,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclDecls.h,v 1.12 1999/05/07 20:07:47 stanton Exp $
+ * RCS: @(#) $Id: tclDecls.h,v 1.13 1999/05/22 01:20:12 stanton Exp $
  */
 
 #ifndef _TCLDECLS
@@ -1129,6 +1129,17 @@ EXTERN int		Tcl_UtfNcmp _ANSI_ARGS_((CONST char * s1,
 /* 370 */
 EXTERN int		Tcl_UtfNcasecmp _ANSI_ARGS_((CONST char * s1, 
 				CONST char * s2, size_t n));
+/* 371 */
+EXTERN int		Tcl_StringCaseMatch _ANSI_ARGS_((CONST char * str, 
+				CONST char * pattern, int nocase));
+/* 372 */
+EXTERN int		Tcl_UniCharIsControl _ANSI_ARGS_((int ch));
+/* 373 */
+EXTERN int		Tcl_UniCharIsGraph _ANSI_ARGS_((int ch));
+/* 374 */
+EXTERN int		Tcl_UniCharIsPrint _ANSI_ARGS_((int ch));
+/* 375 */
+EXTERN int		Tcl_UniCharIsPunct _ANSI_ARGS_((int ch));
 
 typedef struct TclStubHooks {
     struct TclPlatStubs *tclPlatStubs;
@@ -1535,6 +1546,11 @@ typedef struct TclStubs {
     int (*tcl_Stat) _ANSI_ARGS_((CONST char * path, struct stat * bufPtr)); /* 368 */
     int (*tcl_UtfNcmp) _ANSI_ARGS_((CONST char * s1, CONST char * s2, size_t n)); /* 369 */
     int (*tcl_UtfNcasecmp) _ANSI_ARGS_((CONST char * s1, CONST char * s2, size_t n)); /* 370 */
+    int (*tcl_StringCaseMatch) _ANSI_ARGS_((CONST char * str, CONST char * pattern, int nocase)); /* 371 */
+    int (*tcl_UniCharIsControl) _ANSI_ARGS_((int ch)); /* 372 */
+    int (*tcl_UniCharIsGraph) _ANSI_ARGS_((int ch)); /* 373 */
+    int (*tcl_UniCharIsPrint) _ANSI_ARGS_((int ch)); /* 374 */
+    int (*tcl_UniCharIsPunct) _ANSI_ARGS_((int ch)); /* 375 */
 } TclStubs;
 
 #ifdef __cplusplus
@@ -3023,6 +3039,26 @@ extern TclStubs *tclStubsPtr;
 #define Tcl_UtfNcasecmp \
 	(tclStubsPtr->tcl_UtfNcasecmp) /* 370 */
 #endif
+#ifndef Tcl_StringCaseMatch
+#define Tcl_StringCaseMatch \
+	(tclStubsPtr->tcl_StringCaseMatch) /* 371 */
+#endif
+#ifndef Tcl_UniCharIsControl
+#define Tcl_UniCharIsControl \
+	(tclStubsPtr->tcl_UniCharIsControl) /* 372 */
+#endif
+#ifndef Tcl_UniCharIsGraph
+#define Tcl_UniCharIsGraph \
+	(tclStubsPtr->tcl_UniCharIsGraph) /* 373 */
+#endif
+#ifndef Tcl_UniCharIsPrint
+#define Tcl_UniCharIsPrint \
+	(tclStubsPtr->tcl_UniCharIsPrint) /* 374 */
+#endif
+#ifndef Tcl_UniCharIsPunct
+#define Tcl_UniCharIsPunct \
+	(tclStubsPtr->tcl_UniCharIsPunct) /* 375 */
+#endif
 
 #endif /* defined(USE_TCL_STUBS) && !defined(USE_TCL_STUB_PROCS) */
 
diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c
index 3ae0c16..22e1db0 100644
--- a/generic/tclRegexp.c
+++ b/generic/tclRegexp.c
@@ -10,7 +10,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclRegexp.c,v 1.4 1999/05/14 02:04:05 stanton Exp $
+ * RCS: @(#) $Id: tclRegexp.c,v 1.5 1999/05/22 01:20:13 stanton Exp $
  */
 
 #include "tclInt.h"
@@ -751,7 +751,7 @@ CompileRegexp(interp, string, length, flags)
      * a regexp if it has the same pattern and the same flags.
      */
 
-    for (i = 0; i < NUM_REGEXPS; i++) {
+    for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
 	if ((length == tsdPtr->patLengths[i])
 		&& (tsdPtr->regexps[i]->flags == flags)
 		&& (strcmp(string, tsdPtr->patterns[i]) == 0)) {
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index 632f4a8..1638f8d 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -8,7 +8,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclStubInit.c,v 1.13 1999/05/06 18:46:25 stanton Exp $
+ * RCS: @(#) $Id: tclStubInit.c,v 1.14 1999/05/22 01:20:13 stanton Exp $
  */
 
 #include "tclInt.h"
@@ -686,6 +686,11 @@ TclStubs tclStubs = {
     Tcl_Stat, /* 368 */
     Tcl_UtfNcmp, /* 369 */
     Tcl_UtfNcasecmp, /* 370 */
+    Tcl_StringCaseMatch, /* 371 */
+    Tcl_UniCharIsControl, /* 372 */
+    Tcl_UniCharIsGraph, /* 373 */
+    Tcl_UniCharIsPrint, /* 374 */
+    Tcl_UniCharIsPunct, /* 375 */
 };
 
 /* !END!: Do not edit above this line. */
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 2361a2e..635ffbe 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -8,7 +8,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclUtf.c,v 1.6 1999/05/20 23:40:34 hershey Exp $
+ * RCS: @(#) $Id: tclUtf.c,v 1.7 1999/05/22 01:20:13 stanton Exp $
  */
 
 #include "tclInt.h"
@@ -35,6 +35,16 @@
 
 #define CONNECTOR_BITS (1 << CONNECTOR_PUNCTUATION)
 
+#define PRINT_BITS (ALPHA_BITS | DIGIT_BITS | SPACE_BITS | \
+	    (1 << NON_SPACING_MARK) | (1 << ENCLOSING_MARK) | \
+	    (1 << COMBINING_SPACING_MARK) | (1 << LETTER_NUMBER) | \
+	    (1 << OTHER_NUMBER) | (1 << CONNECTOR_PUNCTUATION) | \
+	    (1 << DASH_PUNCTUATION) | (1 << OPEN_PUNCTUATION) | \
+	    (1 << CLOSE_PUNCTUATION) | (1 << INITIAL_QUOTE_PUNCTUATION) | \
+	    (1 << FINAL_QUOTE_PUNCTUATION) | (1 << OTHER_PUNCTUATION) | \
+	    (1 << MATH_SYMBOL) | (1 << CURRENCY_SYMBOL) | \
+	    (1 << MODIFIER_SYMBOL) | (1 << OTHER_SYMBOL))
+
 /*
  * Unicode characters less than this value are represented by themselves 
  * in UTF-8 strings. 
@@ -1341,6 +1351,29 @@ Tcl_UniCharIsAlpha(ch)
 /*
  *----------------------------------------------------------------------
  *
+ * Tcl_UniCharIsControl --
+ *
+ *	Test if a character is a Unicode control character.
+ *
+ * Results:
+ *	Returns non-zero if character is a control.
+ *
+ * Side effects:
+ *	None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharIsControl(ch)
+    int ch;			/* Unicode character to test. */
+{
+    return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == CONTROL);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
  * Tcl_UniCharIsDigit --
  *
  *	Test if a character is a numeric Unicode character.
@@ -1365,6 +1398,30 @@ Tcl_UniCharIsDigit(ch)
 /*
  *----------------------------------------------------------------------
  *
+ * Tcl_UniCharIsGraph --
+ *
+ *	Test if a character is any Unicode print character except space.
+ *
+ * Results:
+ *	Returns non-zero if character is printable, but not space.
+ *
+ * Side effects:
+ *	None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharIsGraph(ch)
+    int ch;			/* Unicode character to test. */
+{
+    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
+    return (((PRINT_BITS >> category) & 1) && ((unsigned char) ch != ' '));
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
  * Tcl_UniCharIsLower --
  *
  *	Test if a character is a lowercase Unicode character.
@@ -1388,6 +1445,55 @@ Tcl_UniCharIsLower(ch)
 /*
  *----------------------------------------------------------------------
  *
+ * Tcl_UniCharIsPrint --
+ *
+ *	Test if a character is a Unicode print character.
+ *
+ * Results:
+ *	Returns non-zero if character is printable.
+ *
+ * Side effects:
+ *	None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharIsPrint(ch)
+    int ch;			/* Unicode character to test. */
+{
+    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
+    return ((PRINT_BITS >> category) & 1);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_UniCharIsPunct --
+ *
+ *	Test if for any printing char that is neither space or an alnum.
+ *
+ * Results:
+ *	Returns non-zero if character is punct.
+ *
+ * Side effects:
+ *	None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharIsPunct(ch)
+    int ch;			/* Unicode character to test. */
+{
+    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
+    return (((PRINT_BITS >> category) & 1) && ((unsigned char) ch != ' ')
+	    && !(((ALPHA_BITS | DIGIT_BITS) >> category) & 1));
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
  * Tcl_UniCharIsSpace --
  *
  *	Test if a character is a whitespace Unicode character.
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index fa4c22f..d60e409 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -10,7 +10,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- *  RCS: @(#) $Id: tclUtil.c,v 1.11 1999/05/06 19:21:11 stanton Exp $
+ *  RCS: @(#) $Id: tclUtil.c,v 1.12 1999/05/22 01:20:13 stanton Exp $
  */
 
 #include "tclInt.h"
@@ -1234,6 +1234,167 @@ Tcl_StringMatch(string, pattern)
 /*
  *----------------------------------------------------------------------
  *
+ * Tcl_StringCaseMatch --
+ *
+ *	See if a particular string matches a particular pattern.
+ *	Allows case insensitivity.
+ *
+ * Results:
+ *	The return value is 1 if string matches pattern, and
+ *	0 otherwise.  The matching operation permits the following
+ *	special characters in the pattern: *?\[] (see the manual
+ *	entry for details on what these mean).
+ *
+ * Side effects:
+ *	None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_StringCaseMatch(string, pattern, nocase)
+    CONST char *string;		/* String. */
+    CONST char *pattern;	/* Pattern, which may contain special
+				 * characters. */
+    int nocase;			/* 0 for case sensitive, 1 for insensitive */
+{
+    int p, s;
+    CONST char *pstart = pattern;
+    Tcl_UniChar ch1, ch2;
+    
+    while (1) {
+	p = *pattern;
+	s = *string;
+	
+	/*
+	 * See if we're at the end of both the pattern and the string.  If
+	 * so, we succeeded.  If we're at the end of the pattern but not at
+	 * the end of the string, we failed.
+	 */
+	
+	if (p == '\0') {
+	    return (s == '\0');
+	}
+	if ((s == '\0') && (p != '*')) {
+	    return 0;
+	}
+
+	/* Check for a "*" as the next pattern character.  It matches
+	 * any substring.  We handle this by calling ourselves
+	 * recursively for each postfix of string, until either we
+	 * match or we reach the end of the string.
+	 */
+	
+	if (p == '*') {
+	    pattern++;
+	    if (*pattern == '\0') {
+		return 1;
+	    }
+	    while (1) {
+		if (Tcl_StringCaseMatch(string, pattern, nocase)) {
+		    return 1;
+		}
+		if (*string == '\0') {
+		    return 0;
+		}
+		string++;
+	    }
+	}
+
+	/* Check for a "?" as the next pattern character.  It matches
+	 * any single character.
+	 */
+
+	if (p == '?') {
+	    pattern++;
+	    string += Tcl_UtfToUniChar(string, &ch1);
+	    continue;
+	}
+
+	/* Check for a "[" as the next pattern character.  It is followed
+	 * by a list of characters that are acceptable, or by a range
+	 * (two characters separated by "-").
+	 */
+	
+	if (p == '[') {
+	    Tcl_UniChar startChar, endChar;
+
+	    pattern++;
+	    string += Tcl_UtfToUniChar(string, &ch1);
+	    if (nocase) {
+		ch1 = Tcl_UniCharToLower(ch1);
+	    }
+	    while (1) {
+		if ((*pattern == ']') || (*pattern == '\0')) {
+		    return 0;
+		}
+		pattern += Tcl_UtfToUniChar(pattern, &startChar);
+		if (nocase) {
+		    startChar = Tcl_UniCharToLower(startChar);
+		}
+		if (*pattern == '-') {
+		    pattern++;
+		    if (*pattern == '\0') {
+			return 0;
+		    }
+		    pattern += Tcl_UtfToUniChar(pattern, &endChar);
+		    if (nocase) {
+			endChar = Tcl_UniCharToLower(endChar);
+		    }
+		    if (((startChar <= ch1) && (ch1 <= endChar))
+			    || ((endChar <= ch1) && (ch1 <= startChar))) {
+			/*
+			 * Matches ranges of form [a-z] or [z-a].
+			 */
+
+			break;
+		    }
+		} else if (startChar == ch1) {
+		    break;
+		}
+	    }
+	    while (*pattern != ']') {
+		if (*pattern == '\0') {
+		    pattern = Tcl_UtfPrev(pattern, pstart);
+		    break;
+		}
+		pattern++;
+	    }
+	    pattern++;
+	    continue;
+	}
+    
+	/* If the next pattern character is '\', just strip off the '\'
+	 * so we do exact matching on the character that follows.
+	 */
+	
+	if (p == '\\') {
+	    pattern++;
+	    p = *pattern;
+	    if (p == '\0') {
+		return 0;
+	    }
+	}
+
+	/* There's no special character.  Just make sure that the next
+	 * bytes of each string match.
+	 */
+	
+	string  += Tcl_UtfToUniChar(string, &ch1);
+	pattern += Tcl_UtfToUniChar(pattern, &ch2);
+	if (nocase) {
+	    if (Tcl_UniCharToLower(ch1) != Tcl_UniCharToLower(ch2)) {
+		return 0;
+	    }
+	} else if (ch1 != ch2) {
+	    return 0;
+	}
+    }
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
  * Tcl_DStringInit --
  *
  *	Initializes a dynamic string, discarding any previous contents
author	stanton <stanton>	1999-05-22 01:20:10 (GMT)
committer	stanton <stanton>	1999-05-22 01:20:10 (GMT)
commit	ac39508cf97576cd9747c5630c4a13d794663b4a (patch)
tree	4b7c61e6c670f227cf4d603907157fb6246d2d50 /generic
parent	21bd132482f68735f5a4381934f56ee911904e87 (diff)
download	tcl-ac39508cf97576cd9747c5630c4a13d794663b4a.zip tcl-ac39508cf97576cd9747c5630c4a13d794663b4a.tar.gz tcl-ac39508cf97576cd9747c5630c4a13d794663b4a.tar.bz2