summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
Diffstat (limited to 'generic')
-rw-r--r--generic/tcl.decls17
-rw-r--r--generic/tcl.h18
-rw-r--r--generic/tclCmdAH.c11
-rw-r--r--generic/tclCmdMZ.c212
-rw-r--r--generic/tclDecls.h38
-rw-r--r--generic/tclRegexp.c4
-rw-r--r--generic/tclStubInit.c7
-rw-r--r--generic/tclUtf.c108
-rw-r--r--generic/tclUtil.c163
9 files changed, 520 insertions, 58 deletions
diff --git a/generic/tcl.decls b/generic/tcl.decls
index d77b076..ae5d445 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -10,7 +10,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: tcl.decls,v 1.12 1999/05/07 20:07:46 stanton Exp $
+# RCS: @(#) $Id: tcl.decls,v 1.13 1999/05/22 01:20:11 stanton Exp $
library tcl
@@ -1262,6 +1262,21 @@ declare 369 generic {
declare 370 generic {
int Tcl_UtfNcasecmp(CONST char *s1, CONST char *s2, size_t n)
}
+declare 371 generic {
+ int Tcl_StringCaseMatch(CONST char *str, CONST char *pattern, int nocase)
+}
+declare 372 generic {
+ int Tcl_UniCharIsControl(int ch)
+}
+declare 373 generic {
+ int Tcl_UniCharIsGraph(int ch)
+}
+declare 374 generic {
+ int Tcl_UniCharIsPrint(int ch)
+}
+declare 375 generic {
+ int Tcl_UniCharIsPunct(int ch)
+}
##############################################################################
diff --git a/generic/tcl.h b/generic/tcl.h
index 9524f87..829f0b7 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -12,13 +12,21 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tcl.h,v 1.43 1999/04/30 23:35:40 stanton Exp $
+ * RCS: @(#) $Id: tcl.h,v 1.44 1999/05/22 01:20:11 stanton Exp $
*/
#ifndef _TCL
#define _TCL
/*
+ * For C++ compilers, use extern "C"
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
* The following defines are used to indicate the various release levels.
*/
@@ -1543,4 +1551,12 @@ EXTERN int Tcl_AppInit _ANSI_ARGS_((Tcl_Interp *interp));
#undef TCL_STORAGE_CLASS
#define TCL_STORAGE_CLASS DLLIMPORT
+/*
+ * end block for C++
+ */
+
+#ifdef __cplusplus
+}
+#endif
+
#endif /* _TCL */
diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c
index 8aa6880..d59dfeb 100644
--- a/generic/tclCmdAH.c
+++ b/generic/tclCmdAH.c
@@ -11,7 +11,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCmdAH.c,v 1.5 1999/04/16 00:46:43 stanton Exp $
+ * RCS: @(#) $Id: tclCmdAH.c,v 1.6 1999/05/22 01:20:12 stanton Exp $
*/
#include "tclInt.h"
@@ -1517,7 +1517,7 @@ GetTypeFromMode(mode)
/*
*----------------------------------------------------------------------
*
- * Tcl_FoObjCmd --
+ * Tcl_ForObjCmd --
*
* This procedure is invoked to process the "for" Tcl command.
* See the user documentation for details on what it does.
@@ -1559,6 +1559,13 @@ Tcl_ForObjCmd(dummy, interp, objc, objv)
return result;
}
while (1) {
+ /*
+ * We need to reset the result before passing it off to
+ * Tcl_ExprBooleanObj. Otherwise, any error message will be appended
+ * to the result of the last evaluation.
+ */
+
+ Tcl_ResetResult(interp);
result = Tcl_ExprBooleanObj(interp, objv[2], &value);
if (result != TCL_OK) {
return result;
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index dc5607c..5488773 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -13,7 +13,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.8 1999/05/06 22:50:03 stanton Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.9 1999/05/22 01:20:12 stanton Exp $
*/
#include "tclInt.h"
@@ -896,7 +896,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
}
if (nocase) {
match = Tcl_UtfNcasecmp(string1, string2,
- (unsigned)length);
+ (unsigned) length);
} else {
match = Tcl_UtfNcmp(string1, string2,
(unsigned) length);
@@ -912,7 +912,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
}
if ((enum options) index == STR_EQUAL) {
- Tcl_SetIntObj(resultPtr, (match) ? 0 : 1);
+ Tcl_SetBooleanObj(resultPtr, (match) ? 0 : 1);
} else {
Tcl_SetIntObj(resultPtr, ((match > 0) ? 1 :
(match < 0) ? -1 : 0));
@@ -921,21 +921,47 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
}
case STR_FIRST: {
register char *p, *end;
- int match;
+ int match, utflen, start;
- if (objc != 4) {
- badFirstLastArgs:
- Tcl_WrongNumArgs(interp, 2, objv, "string1 string2");
+ if (objc < 4 || objc > 5) {
+ Tcl_WrongNumArgs(interp, 2, objv,
+ "string1 string2 ?startIndex?");
return TCL_ERROR;
}
/*
* This algorithm fails on improperly formed UTF strings.
+ * We are searching string2 for the sequence string1.
*/
match = -1;
+ start = 0;
+ utflen = -1;
string1 = Tcl_GetStringFromObj(objv[2], &length1);
string2 = Tcl_GetStringFromObj(objv[3], &length2);
+
+ if (objc == 5) {
+ /*
+ * If a startIndex is specified, we will need to fast forward
+ * to that point in the string before we think about a match
+ */
+ utflen = Tcl_NumUtfChars(string2, length2);
+ if (TclGetIntForIndex(interp, objv[4], utflen-1,
+ &start) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ if (start >= utflen) {
+ goto str_first_done;
+ } else if (start > 0) {
+ if (length2 == utflen) {
+ /* no unicode chars */
+ string2 += start;
+ } else {
+ string2 = Tcl_UtfAtIndex(string2, start);
+ }
+ }
+ }
+
if (length1 > 0) {
end = string2 + length2 - length1 + 1;
for (p = string2; p < end; p++) {
@@ -955,19 +981,25 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
}
/*
- * Compute the character index of the matching string by counting
- * the number of characters before the match.
+ * Compute the character index of the matching string by
+ * counting the number of characters before the match.
*/
-
+ str_first_done:
if (match != -1) {
- match = Tcl_NumUtfChars(string2, match);
+ if (objc == 4) {
+ match = Tcl_NumUtfChars(string2, match);
+ } else if (length2 == utflen) {
+ /* no unicode chars */
+ match += start;
+ } else {
+ match = start + Tcl_NumUtfChars(string2, match);
+ }
}
Tcl_SetIntObj(resultPtr, match);
break;
}
case STR_INDEX: {
int index;
- char buf[TCL_UTF_MAX];
if (objc != 4) {
Tcl_WrongNumArgs(interp, 2, objv, "string charIndex");
@@ -977,15 +1009,25 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
/*
* establish what 'end' really means
*/
- length2 = Tcl_NumUtfChars(string1, length1) - 1;
- if (TclGetIntForIndex(interp, objv[3], length2,
+ length2 = Tcl_NumUtfChars(string1, length1);
+ if (TclGetIntForIndex(interp, objv[3], length2 - 1,
&index) != TCL_OK) {
return TCL_ERROR;
}
- if ((index >= 0) && (index < length1)) {
- length2 = Tcl_UniCharToUtf(Tcl_UniCharAtIndex(string1,
- index), buf);
- Tcl_SetStringObj(resultPtr, buf, length2);
+ /*
+ * index must be between 0 and the UTF length to be valid
+ */
+ if ((index >= 0) && (index < length2)) {
+ if (length1 == length2) {
+ /* no unicode chars */
+ Tcl_SetStringObj(resultPtr, string1+index, 1);
+ } else {
+ char buf[TCL_UTF_MAX];
+
+ length2 = Tcl_UniCharToUtf(Tcl_UniCharAtIndex(string1,
+ index), buf);
+ Tcl_SetStringObj(resultPtr, buf, length2);
+ }
}
break;
}
@@ -997,18 +1039,18 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
Tcl_Obj *objPtr, *failVarObj = NULL;
static char *isOptions[] = {
- "alnum", "alpha", "ascii",
- "boolean", "digit", "double",
- "false", "integer", "lower",
- "space", "true", "upper",
- "wordchar", (char *) NULL
+ "alnum", "alpha", "ascii", "control",
+ "boolean", "digit", "double", "false",
+ "graph", "integer", "lower", "print",
+ "punct", "space", "true", "upper",
+ "wordchar", "xdigit", (char *) NULL
};
enum isOptions {
- STR_IS_ALNUM, STR_IS_ALPHA, STR_IS_ASCII,
- STR_IS_BOOL, STR_IS_DIGIT, STR_IS_DOUBLE,
- STR_IS_FALSE, STR_IS_INT, STR_IS_LOWER,
- STR_IS_SPACE, STR_IS_TRUE, STR_IS_UPPER,
- STR_IS_WORD
+ STR_IS_ALNUM, STR_IS_ALPHA, STR_IS_ASCII, STR_IS_CONTROL,
+ STR_IS_BOOL, STR_IS_DIGIT, STR_IS_DOUBLE, STR_IS_FALSE,
+ STR_IS_GRAPH, STR_IS_INT, STR_IS_LOWER, STR_IS_PRINT,
+ STR_IS_PUNCT, STR_IS_SPACE, STR_IS_TRUE, STR_IS_UPPER,
+ STR_IS_WORD, STR_IS_XDIGIT
};
if (objc < 4 || objc > 7) {
@@ -1101,6 +1143,9 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
result = 0;
}
break;
+ case STR_IS_CONTROL:
+ chcomp = Tcl_UniCharIsControl;
+ break;
case STR_IS_DIGIT:
chcomp = Tcl_UniCharIsDigit;
break;
@@ -1162,6 +1207,9 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
}
break;
}
+ case STR_IS_GRAPH:
+ chcomp = Tcl_UniCharIsGraph;
+ break;
case STR_IS_INT: {
char *stop;
@@ -1170,7 +1218,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
break;
}
/*
- * Like STR_IS_DOUBLE, but we don't use strtoul.
+ * Like STR_IS_DOUBLE, but we use strtoul.
* Since Tcl_GetInt already failed, we set result to 0.
*/
result = 0;
@@ -1204,6 +1252,12 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
case STR_IS_LOWER:
chcomp = Tcl_UniCharIsLower;
break;
+ case STR_IS_PRINT:
+ chcomp = Tcl_UniCharIsPrint;
+ break;
+ case STR_IS_PUNCT:
+ chcomp = Tcl_UniCharIsPunct;
+ break;
case STR_IS_SPACE:
chcomp = Tcl_UniCharIsSpace;
break;
@@ -1213,6 +1267,17 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
case STR_IS_WORD:
chcomp = Tcl_UniCharIsWordChar;
break;
+ case STR_IS_XDIGIT: {
+ for (; string1 < end; string1++, failat++) {
+ /* INTL: We assume unicode is bad for this class */
+ if ((*((unsigned char *)string1) >= 0xC0) ||
+ !isxdigit(*(unsigned char *)string1)) {
+ result = 0;
+ break;
+ }
+ }
+ break;
+ }
}
if (chcomp != NULL) {
for (; string1 < end; string1 += length2, failat++) {
@@ -1238,10 +1303,12 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
}
case STR_LAST: {
register char *p;
- int match;
+ int match, utflen, start;
- if (objc != 4) {
- goto badFirstLastArgs;
+ if (objc < 4 || objc > 5) {
+ Tcl_WrongNumArgs(interp, 2, objv,
+ "string1 string2 ?startIndex?");
+ return TCL_ERROR;
}
/*
@@ -1249,14 +1316,43 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
*/
match = -1;
+ start = 0;
+ utflen = -1;
string1 = Tcl_GetStringFromObj(objv[2], &length1);
string2 = Tcl_GetStringFromObj(objv[3], &length2);
+
+ if (objc == 5) {
+ /*
+ * If a startIndex is specified, we will need to restrict
+ * the string range to that char index in the string
+ */
+ utflen = Tcl_NumUtfChars(string2, length2);
+ if (TclGetIntForIndex(interp, objv[4], utflen-1,
+ &start) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ if (start < 0) {
+ goto str_last_done;
+ } else if (start < utflen) {
+ if (length2 == utflen) {
+ /* no unicode chars */
+ p = string2 + start + 1 - length1;
+ } else {
+ p = Tcl_UtfAtIndex(string2, start+1) - length1;
+ }
+ } else {
+ p = string2 + length2 - length1;
+ }
+ } else {
+ p = string2 + length2 - length1;
+ }
+
if (length1 > 0) {
- for (p = string2 + length2 - length1; p >= string2; p--) {
+ for (; p >= string2; p--) {
/*
* Scan backwards to find the first character.
*/
-
+
while ((p != string2) && (*p != *string1)) {
p--;
}
@@ -1271,9 +1367,12 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
* Compute the character index of the matching string by counting
* the number of characters before the match.
*/
-
+ str_last_done:
if (match != -1) {
- match = Tcl_NumUtfChars(string2, match);
+ if ((objc == 4) || (length2 != utflen)) {
+ /* only check when we've got unicode chars */
+ match = Tcl_NumUtfChars(string2, match);
+ }
}
Tcl_SetIntObj(resultPtr, match);
break;
@@ -1408,14 +1507,30 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
break;
}
case STR_MATCH: {
- if (objc != 4) {
- Tcl_WrongNumArgs(interp, 2, objv, "pattern string");
+ int nocase = 0;
+
+ if (objc < 4 || objc > 5) {
+ Tcl_WrongNumArgs(interp, 2, objv, "?-nocase? pattern string");
return TCL_ERROR;
}
- string1 = Tcl_GetStringFromObj(objv[2], &length1);
- string2 = Tcl_GetStringFromObj(objv[3], &length2);
- Tcl_SetBooleanObj(resultPtr, Tcl_StringMatch(string2, string1));
+ if (objc == 5) {
+ string2 = Tcl_GetStringFromObj(objv[2], &length2);
+ if ((length2 > 1) &&
+ strncmp(string2, "-nocase", (size_t) length2) == 0) {
+ nocase = 1;
+ } else {
+ Tcl_AppendStringsToObj(resultPtr, "bad option \"",
+ string2, "\": must be -nocase",
+ (char *) NULL);
+ return TCL_ERROR;
+ }
+ }
+
+ Tcl_SetBooleanObj(resultPtr,
+ Tcl_StringCaseMatch(Tcl_GetString(objv[objc-1]),
+ Tcl_GetString(objv[objc-2]),
+ nocase));
break;
}
case STR_RANGE: {
@@ -1427,20 +1542,20 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
}
string1 = Tcl_GetStringFromObj(objv[2], &length1);
- length1 = Tcl_NumUtfChars(string1, length1);
- if (TclGetIntForIndex(interp, objv[3], length1 - 1,
+ length1 = Tcl_NumUtfChars(string1, length1) - 1;
+ if (TclGetIntForIndex(interp, objv[3], length1,
&first) != TCL_OK) {
return TCL_ERROR;
}
- if (TclGetIntForIndex(interp, objv[4], length1 - 1,
+ if (TclGetIntForIndex(interp, objv[4], length1,
&last) != TCL_OK) {
return TCL_ERROR;
}
if (first < 0) {
first = 0;
}
- if (last >= length1 - 1) {
- last = length1 - 1;
+ if (last >= length1) {
+ last = length1;
}
if (last >= first) {
char *start, *end;
@@ -1474,8 +1589,9 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
case STR_REPLACE: {
int first, last;
- if (!(objc == 5 || objc == 6)) {
- Tcl_WrongNumArgs(interp, 2, objv, "string first last ?string?");
+ if (objc < 5 || objc > 6) {
+ Tcl_WrongNumArgs(interp, 2, objv,
+ "string first last ?string?");
return TCL_ERROR;
}
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index ffb080d..a9177a6 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclDecls.h,v 1.12 1999/05/07 20:07:47 stanton Exp $
+ * RCS: @(#) $Id: tclDecls.h,v 1.13 1999/05/22 01:20:12 stanton Exp $
*/
#ifndef _TCLDECLS
@@ -1129,6 +1129,17 @@ EXTERN int Tcl_UtfNcmp _ANSI_ARGS_((CONST char * s1,
/* 370 */
EXTERN int Tcl_UtfNcasecmp _ANSI_ARGS_((CONST char * s1,
CONST char * s2, size_t n));
+/* 371 */
+EXTERN int Tcl_StringCaseMatch _ANSI_ARGS_((CONST char * str,
+ CONST char * pattern, int nocase));
+/* 372 */
+EXTERN int Tcl_UniCharIsControl _ANSI_ARGS_((int ch));
+/* 373 */
+EXTERN int Tcl_UniCharIsGraph _ANSI_ARGS_((int ch));
+/* 374 */
+EXTERN int Tcl_UniCharIsPrint _ANSI_ARGS_((int ch));
+/* 375 */
+EXTERN int Tcl_UniCharIsPunct _ANSI_ARGS_((int ch));
typedef struct TclStubHooks {
struct TclPlatStubs *tclPlatStubs;
@@ -1535,6 +1546,11 @@ typedef struct TclStubs {
int (*tcl_Stat) _ANSI_ARGS_((CONST char * path, struct stat * bufPtr)); /* 368 */
int (*tcl_UtfNcmp) _ANSI_ARGS_((CONST char * s1, CONST char * s2, size_t n)); /* 369 */
int (*tcl_UtfNcasecmp) _ANSI_ARGS_((CONST char * s1, CONST char * s2, size_t n)); /* 370 */
+ int (*tcl_StringCaseMatch) _ANSI_ARGS_((CONST char * str, CONST char * pattern, int nocase)); /* 371 */
+ int (*tcl_UniCharIsControl) _ANSI_ARGS_((int ch)); /* 372 */
+ int (*tcl_UniCharIsGraph) _ANSI_ARGS_((int ch)); /* 373 */
+ int (*tcl_UniCharIsPrint) _ANSI_ARGS_((int ch)); /* 374 */
+ int (*tcl_UniCharIsPunct) _ANSI_ARGS_((int ch)); /* 375 */
} TclStubs;
#ifdef __cplusplus
@@ -3023,6 +3039,26 @@ extern TclStubs *tclStubsPtr;
#define Tcl_UtfNcasecmp \
(tclStubsPtr->tcl_UtfNcasecmp) /* 370 */
#endif
+#ifndef Tcl_StringCaseMatch
+#define Tcl_StringCaseMatch \
+ (tclStubsPtr->tcl_StringCaseMatch) /* 371 */
+#endif
+#ifndef Tcl_UniCharIsControl
+#define Tcl_UniCharIsControl \
+ (tclStubsPtr->tcl_UniCharIsControl) /* 372 */
+#endif
+#ifndef Tcl_UniCharIsGraph
+#define Tcl_UniCharIsGraph \
+ (tclStubsPtr->tcl_UniCharIsGraph) /* 373 */
+#endif
+#ifndef Tcl_UniCharIsPrint
+#define Tcl_UniCharIsPrint \
+ (tclStubsPtr->tcl_UniCharIsPrint) /* 374 */
+#endif
+#ifndef Tcl_UniCharIsPunct
+#define Tcl_UniCharIsPunct \
+ (tclStubsPtr->tcl_UniCharIsPunct) /* 375 */
+#endif
#endif /* defined(USE_TCL_STUBS) && !defined(USE_TCL_STUB_PROCS) */
diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c
index 3ae0c16..22e1db0 100644
--- a/generic/tclRegexp.c
+++ b/generic/tclRegexp.c
@@ -10,7 +10,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclRegexp.c,v 1.4 1999/05/14 02:04:05 stanton Exp $
+ * RCS: @(#) $Id: tclRegexp.c,v 1.5 1999/05/22 01:20:13 stanton Exp $
*/
#include "tclInt.h"
@@ -751,7 +751,7 @@ CompileRegexp(interp, string, length, flags)
* a regexp if it has the same pattern and the same flags.
*/
- for (i = 0; i < NUM_REGEXPS; i++) {
+ for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
if ((length == tsdPtr->patLengths[i])
&& (tsdPtr->regexps[i]->flags == flags)
&& (strcmp(string, tsdPtr->patterns[i]) == 0)) {
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index 632f4a8..1638f8d 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclStubInit.c,v 1.13 1999/05/06 18:46:25 stanton Exp $
+ * RCS: @(#) $Id: tclStubInit.c,v 1.14 1999/05/22 01:20:13 stanton Exp $
*/
#include "tclInt.h"
@@ -686,6 +686,11 @@ TclStubs tclStubs = {
Tcl_Stat, /* 368 */
Tcl_UtfNcmp, /* 369 */
Tcl_UtfNcasecmp, /* 370 */
+ Tcl_StringCaseMatch, /* 371 */
+ Tcl_UniCharIsControl, /* 372 */
+ Tcl_UniCharIsGraph, /* 373 */
+ Tcl_UniCharIsPrint, /* 374 */
+ Tcl_UniCharIsPunct, /* 375 */
};
/* !END!: Do not edit above this line. */
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 2361a2e..635ffbe 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclUtf.c,v 1.6 1999/05/20 23:40:34 hershey Exp $
+ * RCS: @(#) $Id: tclUtf.c,v 1.7 1999/05/22 01:20:13 stanton Exp $
*/
#include "tclInt.h"
@@ -35,6 +35,16 @@
#define CONNECTOR_BITS (1 << CONNECTOR_PUNCTUATION)
+#define PRINT_BITS (ALPHA_BITS | DIGIT_BITS | SPACE_BITS | \
+ (1 << NON_SPACING_MARK) | (1 << ENCLOSING_MARK) | \
+ (1 << COMBINING_SPACING_MARK) | (1 << LETTER_NUMBER) | \
+ (1 << OTHER_NUMBER) | (1 << CONNECTOR_PUNCTUATION) | \
+ (1 << DASH_PUNCTUATION) | (1 << OPEN_PUNCTUATION) | \
+ (1 << CLOSE_PUNCTUATION) | (1 << INITIAL_QUOTE_PUNCTUATION) | \
+ (1 << FINAL_QUOTE_PUNCTUATION) | (1 << OTHER_PUNCTUATION) | \
+ (1 << MATH_SYMBOL) | (1 << CURRENCY_SYMBOL) | \
+ (1 << MODIFIER_SYMBOL) | (1 << OTHER_SYMBOL))
+
/*
* Unicode characters less than this value are represented by themselves
* in UTF-8 strings.
@@ -1341,6 +1351,29 @@ Tcl_UniCharIsAlpha(ch)
/*
*----------------------------------------------------------------------
*
+ * Tcl_UniCharIsControl --
+ *
+ * Test if a character is a Unicode control character.
+ *
+ * Results:
+ * Returns non-zero if character is a control.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharIsControl(ch)
+ int ch; /* Unicode character to test. */
+{
+ return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == CONTROL);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* Tcl_UniCharIsDigit --
*
* Test if a character is a numeric Unicode character.
@@ -1365,6 +1398,30 @@ Tcl_UniCharIsDigit(ch)
/*
*----------------------------------------------------------------------
*
+ * Tcl_UniCharIsGraph --
+ *
+ * Test if a character is any Unicode print character except space.
+ *
+ * Results:
+ * Returns non-zero if character is printable, but not space.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharIsGraph(ch)
+ int ch; /* Unicode character to test. */
+{
+ register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
+ return (((PRINT_BITS >> category) & 1) && ((unsigned char) ch != ' '));
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* Tcl_UniCharIsLower --
*
* Test if a character is a lowercase Unicode character.
@@ -1388,6 +1445,55 @@ Tcl_UniCharIsLower(ch)
/*
*----------------------------------------------------------------------
*
+ * Tcl_UniCharIsPrint --
+ *
+ * Test if a character is a Unicode print character.
+ *
+ * Results:
+ * Returns non-zero if character is printable.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharIsPrint(ch)
+ int ch; /* Unicode character to test. */
+{
+ register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
+ return ((PRINT_BITS >> category) & 1);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_UniCharIsPunct --
+ *
+ * Test if for any printing char that is neither space or an alnum.
+ *
+ * Results:
+ * Returns non-zero if character is punct.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharIsPunct(ch)
+ int ch; /* Unicode character to test. */
+{
+ register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
+ return (((PRINT_BITS >> category) & 1) && ((unsigned char) ch != ' ')
+ && !(((ALPHA_BITS | DIGIT_BITS) >> category) & 1));
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* Tcl_UniCharIsSpace --
*
* Test if a character is a whitespace Unicode character.
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index fa4c22f..d60e409 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -10,7 +10,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclUtil.c,v 1.11 1999/05/06 19:21:11 stanton Exp $
+ * RCS: @(#) $Id: tclUtil.c,v 1.12 1999/05/22 01:20:13 stanton Exp $
*/
#include "tclInt.h"
@@ -1234,6 +1234,167 @@ Tcl_StringMatch(string, pattern)
/*
*----------------------------------------------------------------------
*
+ * Tcl_StringCaseMatch --
+ *
+ * See if a particular string matches a particular pattern.
+ * Allows case insensitivity.
+ *
+ * Results:
+ * The return value is 1 if string matches pattern, and
+ * 0 otherwise. The matching operation permits the following
+ * special characters in the pattern: *?\[] (see the manual
+ * entry for details on what these mean).
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_StringCaseMatch(string, pattern, nocase)
+ CONST char *string; /* String. */
+ CONST char *pattern; /* Pattern, which may contain special
+ * characters. */
+ int nocase; /* 0 for case sensitive, 1 for insensitive */
+{
+ int p, s;
+ CONST char *pstart = pattern;
+ Tcl_UniChar ch1, ch2;
+
+ while (1) {
+ p = *pattern;
+ s = *string;
+
+ /*
+ * See if we're at the end of both the pattern and the string. If
+ * so, we succeeded. If we're at the end of the pattern but not at
+ * the end of the string, we failed.
+ */
+
+ if (p == '\0') {
+ return (s == '\0');
+ }
+ if ((s == '\0') && (p != '*')) {
+ return 0;
+ }
+
+ /* Check for a "*" as the next pattern character. It matches
+ * any substring. We handle this by calling ourselves
+ * recursively for each postfix of string, until either we
+ * match or we reach the end of the string.
+ */
+
+ if (p == '*') {
+ pattern++;
+ if (*pattern == '\0') {
+ return 1;
+ }
+ while (1) {
+ if (Tcl_StringCaseMatch(string, pattern, nocase)) {
+ return 1;
+ }
+ if (*string == '\0') {
+ return 0;
+ }
+ string++;
+ }
+ }
+
+ /* Check for a "?" as the next pattern character. It matches
+ * any single character.
+ */
+
+ if (p == '?') {
+ pattern++;
+ string += Tcl_UtfToUniChar(string, &ch1);
+ continue;
+ }
+
+ /* Check for a "[" as the next pattern character. It is followed
+ * by a list of characters that are acceptable, or by a range
+ * (two characters separated by "-").
+ */
+
+ if (p == '[') {
+ Tcl_UniChar startChar, endChar;
+
+ pattern++;
+ string += Tcl_UtfToUniChar(string, &ch1);
+ if (nocase) {
+ ch1 = Tcl_UniCharToLower(ch1);
+ }
+ while (1) {
+ if ((*pattern == ']') || (*pattern == '\0')) {
+ return 0;
+ }
+ pattern += Tcl_UtfToUniChar(pattern, &startChar);
+ if (nocase) {
+ startChar = Tcl_UniCharToLower(startChar);
+ }
+ if (*pattern == '-') {
+ pattern++;
+ if (*pattern == '\0') {
+ return 0;
+ }
+ pattern += Tcl_UtfToUniChar(pattern, &endChar);
+ if (nocase) {
+ endChar = Tcl_UniCharToLower(endChar);
+ }
+ if (((startChar <= ch1) && (ch1 <= endChar))
+ || ((endChar <= ch1) && (ch1 <= startChar))) {
+ /*
+ * Matches ranges of form [a-z] or [z-a].
+ */
+
+ break;
+ }
+ } else if (startChar == ch1) {
+ break;
+ }
+ }
+ while (*pattern != ']') {
+ if (*pattern == '\0') {
+ pattern = Tcl_UtfPrev(pattern, pstart);
+ break;
+ }
+ pattern++;
+ }
+ pattern++;
+ continue;
+ }
+
+ /* If the next pattern character is '\', just strip off the '\'
+ * so we do exact matching on the character that follows.
+ */
+
+ if (p == '\\') {
+ pattern++;
+ p = *pattern;
+ if (p == '\0') {
+ return 0;
+ }
+ }
+
+ /* There's no special character. Just make sure that the next
+ * bytes of each string match.
+ */
+
+ string += Tcl_UtfToUniChar(string, &ch1);
+ pattern += Tcl_UtfToUniChar(pattern, &ch2);
+ if (nocase) {
+ if (Tcl_UniCharToLower(ch1) != Tcl_UniCharToLower(ch2)) {
+ return 0;
+ }
+ } else if (ch1 != ch2) {
+ return 0;
+ }
+ }
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* Tcl_DStringInit --
*
* Initializes a dynamic string, discarding any previous contents