diff options
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tcl.decls | 17 | ||||
-rw-r--r-- | generic/tcl.h | 18 | ||||
-rw-r--r-- | generic/tclCmdAH.c | 11 | ||||
-rw-r--r-- | generic/tclCmdMZ.c | 212 | ||||
-rw-r--r-- | generic/tclDecls.h | 38 | ||||
-rw-r--r-- | generic/tclRegexp.c | 4 | ||||
-rw-r--r-- | generic/tclStubInit.c | 7 | ||||
-rw-r--r-- | generic/tclUtf.c | 108 | ||||
-rw-r--r-- | generic/tclUtil.c | 163 |
9 files changed, 520 insertions, 58 deletions
diff --git a/generic/tcl.decls b/generic/tcl.decls index d77b076..ae5d445 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -10,7 +10,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: tcl.decls,v 1.12 1999/05/07 20:07:46 stanton Exp $ +# RCS: @(#) $Id: tcl.decls,v 1.13 1999/05/22 01:20:11 stanton Exp $ library tcl @@ -1262,6 +1262,21 @@ declare 369 generic { declare 370 generic { int Tcl_UtfNcasecmp(CONST char *s1, CONST char *s2, size_t n) } +declare 371 generic { + int Tcl_StringCaseMatch(CONST char *str, CONST char *pattern, int nocase) +} +declare 372 generic { + int Tcl_UniCharIsControl(int ch) +} +declare 373 generic { + int Tcl_UniCharIsGraph(int ch) +} +declare 374 generic { + int Tcl_UniCharIsPrint(int ch) +} +declare 375 generic { + int Tcl_UniCharIsPunct(int ch) +} ############################################################################## diff --git a/generic/tcl.h b/generic/tcl.h index 9524f87..829f0b7 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -12,13 +12,21 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tcl.h,v 1.43 1999/04/30 23:35:40 stanton Exp $ + * RCS: @(#) $Id: tcl.h,v 1.44 1999/05/22 01:20:11 stanton Exp $ */ #ifndef _TCL #define _TCL /* + * For C++ compilers, use extern "C" + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* * The following defines are used to indicate the various release levels. */ @@ -1543,4 +1551,12 @@ EXTERN int Tcl_AppInit _ANSI_ARGS_((Tcl_Interp *interp)); #undef TCL_STORAGE_CLASS #define TCL_STORAGE_CLASS DLLIMPORT +/* + * end block for C++ + */ + +#ifdef __cplusplus +} +#endif + #endif /* _TCL */ diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 8aa6880..d59dfeb 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -11,7 +11,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdAH.c,v 1.5 1999/04/16 00:46:43 stanton Exp $ + * RCS: @(#) $Id: tclCmdAH.c,v 1.6 1999/05/22 01:20:12 stanton Exp $ */ #include "tclInt.h" @@ -1517,7 +1517,7 @@ GetTypeFromMode(mode) /* *---------------------------------------------------------------------- * - * Tcl_FoObjCmd -- + * Tcl_ForObjCmd -- * * This procedure is invoked to process the "for" Tcl command. * See the user documentation for details on what it does. @@ -1559,6 +1559,13 @@ Tcl_ForObjCmd(dummy, interp, objc, objv) return result; } while (1) { + /* + * We need to reset the result before passing it off to + * Tcl_ExprBooleanObj. Otherwise, any error message will be appended + * to the result of the last evaluation. + */ + + Tcl_ResetResult(interp); result = Tcl_ExprBooleanObj(interp, objv[2], &value); if (result != TCL_OK) { return result; diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index dc5607c..5488773 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -13,7 +13,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdMZ.c,v 1.8 1999/05/06 22:50:03 stanton Exp $ + * RCS: @(#) $Id: tclCmdMZ.c,v 1.9 1999/05/22 01:20:12 stanton Exp $ */ #include "tclInt.h" @@ -896,7 +896,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) } if (nocase) { match = Tcl_UtfNcasecmp(string1, string2, - (unsigned)length); + (unsigned) length); } else { match = Tcl_UtfNcmp(string1, string2, (unsigned) length); @@ -912,7 +912,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) } if ((enum options) index == STR_EQUAL) { - Tcl_SetIntObj(resultPtr, (match) ? 0 : 1); + Tcl_SetBooleanObj(resultPtr, (match) ? 0 : 1); } else { Tcl_SetIntObj(resultPtr, ((match > 0) ? 1 : (match < 0) ? -1 : 0)); @@ -921,21 +921,47 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) } case STR_FIRST: { register char *p, *end; - int match; + int match, utflen, start; - if (objc != 4) { - badFirstLastArgs: - Tcl_WrongNumArgs(interp, 2, objv, "string1 string2"); + if (objc < 4 || objc > 5) { + Tcl_WrongNumArgs(interp, 2, objv, + "string1 string2 ?startIndex?"); return TCL_ERROR; } /* * This algorithm fails on improperly formed UTF strings. + * We are searching string2 for the sequence string1. */ match = -1; + start = 0; + utflen = -1; string1 = Tcl_GetStringFromObj(objv[2], &length1); string2 = Tcl_GetStringFromObj(objv[3], &length2); + + if (objc == 5) { + /* + * If a startIndex is specified, we will need to fast forward + * to that point in the string before we think about a match + */ + utflen = Tcl_NumUtfChars(string2, length2); + if (TclGetIntForIndex(interp, objv[4], utflen-1, + &start) != TCL_OK) { + return TCL_ERROR; + } + if (start >= utflen) { + goto str_first_done; + } else if (start > 0) { + if (length2 == utflen) { + /* no unicode chars */ + string2 += start; + } else { + string2 = Tcl_UtfAtIndex(string2, start); + } + } + } + if (length1 > 0) { end = string2 + length2 - length1 + 1; for (p = string2; p < end; p++) { @@ -955,19 +981,25 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) } /* - * Compute the character index of the matching string by counting - * the number of characters before the match. + * Compute the character index of the matching string by + * counting the number of characters before the match. */ - + str_first_done: if (match != -1) { - match = Tcl_NumUtfChars(string2, match); + if (objc == 4) { + match = Tcl_NumUtfChars(string2, match); + } else if (length2 == utflen) { + /* no unicode chars */ + match += start; + } else { + match = start + Tcl_NumUtfChars(string2, match); + } } Tcl_SetIntObj(resultPtr, match); break; } case STR_INDEX: { int index; - char buf[TCL_UTF_MAX]; if (objc != 4) { Tcl_WrongNumArgs(interp, 2, objv, "string charIndex"); @@ -977,15 +1009,25 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) /* * establish what 'end' really means */ - length2 = Tcl_NumUtfChars(string1, length1) - 1; - if (TclGetIntForIndex(interp, objv[3], length2, + length2 = Tcl_NumUtfChars(string1, length1); + if (TclGetIntForIndex(interp, objv[3], length2 - 1, &index) != TCL_OK) { return TCL_ERROR; } - if ((index >= 0) && (index < length1)) { - length2 = Tcl_UniCharToUtf(Tcl_UniCharAtIndex(string1, - index), buf); - Tcl_SetStringObj(resultPtr, buf, length2); + /* + * index must be between 0 and the UTF length to be valid + */ + if ((index >= 0) && (index < length2)) { + if (length1 == length2) { + /* no unicode chars */ + Tcl_SetStringObj(resultPtr, string1+index, 1); + } else { + char buf[TCL_UTF_MAX]; + + length2 = Tcl_UniCharToUtf(Tcl_UniCharAtIndex(string1, + index), buf); + Tcl_SetStringObj(resultPtr, buf, length2); + } } break; } @@ -997,18 +1039,18 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) Tcl_Obj *objPtr, *failVarObj = NULL; static char *isOptions[] = { - "alnum", "alpha", "ascii", - "boolean", "digit", "double", - "false", "integer", "lower", - "space", "true", "upper", - "wordchar", (char *) NULL + "alnum", "alpha", "ascii", "control", + "boolean", "digit", "double", "false", + "graph", "integer", "lower", "print", + "punct", "space", "true", "upper", + "wordchar", "xdigit", (char *) NULL }; enum isOptions { - STR_IS_ALNUM, STR_IS_ALPHA, STR_IS_ASCII, - STR_IS_BOOL, STR_IS_DIGIT, STR_IS_DOUBLE, - STR_IS_FALSE, STR_IS_INT, STR_IS_LOWER, - STR_IS_SPACE, STR_IS_TRUE, STR_IS_UPPER, - STR_IS_WORD + STR_IS_ALNUM, STR_IS_ALPHA, STR_IS_ASCII, STR_IS_CONTROL, + STR_IS_BOOL, STR_IS_DIGIT, STR_IS_DOUBLE, STR_IS_FALSE, + STR_IS_GRAPH, STR_IS_INT, STR_IS_LOWER, STR_IS_PRINT, + STR_IS_PUNCT, STR_IS_SPACE, STR_IS_TRUE, STR_IS_UPPER, + STR_IS_WORD, STR_IS_XDIGIT }; if (objc < 4 || objc > 7) { @@ -1101,6 +1143,9 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) result = 0; } break; + case STR_IS_CONTROL: + chcomp = Tcl_UniCharIsControl; + break; case STR_IS_DIGIT: chcomp = Tcl_UniCharIsDigit; break; @@ -1162,6 +1207,9 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) } break; } + case STR_IS_GRAPH: + chcomp = Tcl_UniCharIsGraph; + break; case STR_IS_INT: { char *stop; @@ -1170,7 +1218,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) break; } /* - * Like STR_IS_DOUBLE, but we don't use strtoul. + * Like STR_IS_DOUBLE, but we use strtoul. * Since Tcl_GetInt already failed, we set result to 0. */ result = 0; @@ -1204,6 +1252,12 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) case STR_IS_LOWER: chcomp = Tcl_UniCharIsLower; break; + case STR_IS_PRINT: + chcomp = Tcl_UniCharIsPrint; + break; + case STR_IS_PUNCT: + chcomp = Tcl_UniCharIsPunct; + break; case STR_IS_SPACE: chcomp = Tcl_UniCharIsSpace; break; @@ -1213,6 +1267,17 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) case STR_IS_WORD: chcomp = Tcl_UniCharIsWordChar; break; + case STR_IS_XDIGIT: { + for (; string1 < end; string1++, failat++) { + /* INTL: We assume unicode is bad for this class */ + if ((*((unsigned char *)string1) >= 0xC0) || + !isxdigit(*(unsigned char *)string1)) { + result = 0; + break; + } + } + break; + } } if (chcomp != NULL) { for (; string1 < end; string1 += length2, failat++) { @@ -1238,10 +1303,12 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) } case STR_LAST: { register char *p; - int match; + int match, utflen, start; - if (objc != 4) { - goto badFirstLastArgs; + if (objc < 4 || objc > 5) { + Tcl_WrongNumArgs(interp, 2, objv, + "string1 string2 ?startIndex?"); + return TCL_ERROR; } /* @@ -1249,14 +1316,43 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) */ match = -1; + start = 0; + utflen = -1; string1 = Tcl_GetStringFromObj(objv[2], &length1); string2 = Tcl_GetStringFromObj(objv[3], &length2); + + if (objc == 5) { + /* + * If a startIndex is specified, we will need to restrict + * the string range to that char index in the string + */ + utflen = Tcl_NumUtfChars(string2, length2); + if (TclGetIntForIndex(interp, objv[4], utflen-1, + &start) != TCL_OK) { + return TCL_ERROR; + } + if (start < 0) { + goto str_last_done; + } else if (start < utflen) { + if (length2 == utflen) { + /* no unicode chars */ + p = string2 + start + 1 - length1; + } else { + p = Tcl_UtfAtIndex(string2, start+1) - length1; + } + } else { + p = string2 + length2 - length1; + } + } else { + p = string2 + length2 - length1; + } + if (length1 > 0) { - for (p = string2 + length2 - length1; p >= string2; p--) { + for (; p >= string2; p--) { /* * Scan backwards to find the first character. */ - + while ((p != string2) && (*p != *string1)) { p--; } @@ -1271,9 +1367,12 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) * Compute the character index of the matching string by counting * the number of characters before the match. */ - + str_last_done: if (match != -1) { - match = Tcl_NumUtfChars(string2, match); + if ((objc == 4) || (length2 != utflen)) { + /* only check when we've got unicode chars */ + match = Tcl_NumUtfChars(string2, match); + } } Tcl_SetIntObj(resultPtr, match); break; @@ -1408,14 +1507,30 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) break; } case STR_MATCH: { - if (objc != 4) { - Tcl_WrongNumArgs(interp, 2, objv, "pattern string"); + int nocase = 0; + + if (objc < 4 || objc > 5) { + Tcl_WrongNumArgs(interp, 2, objv, "?-nocase? pattern string"); return TCL_ERROR; } - string1 = Tcl_GetStringFromObj(objv[2], &length1); - string2 = Tcl_GetStringFromObj(objv[3], &length2); - Tcl_SetBooleanObj(resultPtr, Tcl_StringMatch(string2, string1)); + if (objc == 5) { + string2 = Tcl_GetStringFromObj(objv[2], &length2); + if ((length2 > 1) && + strncmp(string2, "-nocase", (size_t) length2) == 0) { + nocase = 1; + } else { + Tcl_AppendStringsToObj(resultPtr, "bad option \"", + string2, "\": must be -nocase", + (char *) NULL); + return TCL_ERROR; + } + } + + Tcl_SetBooleanObj(resultPtr, + Tcl_StringCaseMatch(Tcl_GetString(objv[objc-1]), + Tcl_GetString(objv[objc-2]), + nocase)); break; } case STR_RANGE: { @@ -1427,20 +1542,20 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) } string1 = Tcl_GetStringFromObj(objv[2], &length1); - length1 = Tcl_NumUtfChars(string1, length1); - if (TclGetIntForIndex(interp, objv[3], length1 - 1, + length1 = Tcl_NumUtfChars(string1, length1) - 1; + if (TclGetIntForIndex(interp, objv[3], length1, &first) != TCL_OK) { return TCL_ERROR; } - if (TclGetIntForIndex(interp, objv[4], length1 - 1, + if (TclGetIntForIndex(interp, objv[4], length1, &last) != TCL_OK) { return TCL_ERROR; } if (first < 0) { first = 0; } - if (last >= length1 - 1) { - last = length1 - 1; + if (last >= length1) { + last = length1; } if (last >= first) { char *start, *end; @@ -1474,8 +1589,9 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) case STR_REPLACE: { int first, last; - if (!(objc == 5 || objc == 6)) { - Tcl_WrongNumArgs(interp, 2, objv, "string first last ?string?"); + if (objc < 5 || objc > 6) { + Tcl_WrongNumArgs(interp, 2, objv, + "string first last ?string?"); return TCL_ERROR; } diff --git a/generic/tclDecls.h b/generic/tclDecls.h index ffb080d..a9177a6 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -8,7 +8,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclDecls.h,v 1.12 1999/05/07 20:07:47 stanton Exp $ + * RCS: @(#) $Id: tclDecls.h,v 1.13 1999/05/22 01:20:12 stanton Exp $ */ #ifndef _TCLDECLS @@ -1129,6 +1129,17 @@ EXTERN int Tcl_UtfNcmp _ANSI_ARGS_((CONST char * s1, /* 370 */ EXTERN int Tcl_UtfNcasecmp _ANSI_ARGS_((CONST char * s1, CONST char * s2, size_t n)); +/* 371 */ +EXTERN int Tcl_StringCaseMatch _ANSI_ARGS_((CONST char * str, + CONST char * pattern, int nocase)); +/* 372 */ +EXTERN int Tcl_UniCharIsControl _ANSI_ARGS_((int ch)); +/* 373 */ +EXTERN int Tcl_UniCharIsGraph _ANSI_ARGS_((int ch)); +/* 374 */ +EXTERN int Tcl_UniCharIsPrint _ANSI_ARGS_((int ch)); +/* 375 */ +EXTERN int Tcl_UniCharIsPunct _ANSI_ARGS_((int ch)); typedef struct TclStubHooks { struct TclPlatStubs *tclPlatStubs; @@ -1535,6 +1546,11 @@ typedef struct TclStubs { int (*tcl_Stat) _ANSI_ARGS_((CONST char * path, struct stat * bufPtr)); /* 368 */ int (*tcl_UtfNcmp) _ANSI_ARGS_((CONST char * s1, CONST char * s2, size_t n)); /* 369 */ int (*tcl_UtfNcasecmp) _ANSI_ARGS_((CONST char * s1, CONST char * s2, size_t n)); /* 370 */ + int (*tcl_StringCaseMatch) _ANSI_ARGS_((CONST char * str, CONST char * pattern, int nocase)); /* 371 */ + int (*tcl_UniCharIsControl) _ANSI_ARGS_((int ch)); /* 372 */ + int (*tcl_UniCharIsGraph) _ANSI_ARGS_((int ch)); /* 373 */ + int (*tcl_UniCharIsPrint) _ANSI_ARGS_((int ch)); /* 374 */ + int (*tcl_UniCharIsPunct) _ANSI_ARGS_((int ch)); /* 375 */ } TclStubs; #ifdef __cplusplus @@ -3023,6 +3039,26 @@ extern TclStubs *tclStubsPtr; #define Tcl_UtfNcasecmp \ (tclStubsPtr->tcl_UtfNcasecmp) /* 370 */ #endif +#ifndef Tcl_StringCaseMatch +#define Tcl_StringCaseMatch \ + (tclStubsPtr->tcl_StringCaseMatch) /* 371 */ +#endif +#ifndef Tcl_UniCharIsControl +#define Tcl_UniCharIsControl \ + (tclStubsPtr->tcl_UniCharIsControl) /* 372 */ +#endif +#ifndef Tcl_UniCharIsGraph +#define Tcl_UniCharIsGraph \ + (tclStubsPtr->tcl_UniCharIsGraph) /* 373 */ +#endif +#ifndef Tcl_UniCharIsPrint +#define Tcl_UniCharIsPrint \ + (tclStubsPtr->tcl_UniCharIsPrint) /* 374 */ +#endif +#ifndef Tcl_UniCharIsPunct +#define Tcl_UniCharIsPunct \ + (tclStubsPtr->tcl_UniCharIsPunct) /* 375 */ +#endif #endif /* defined(USE_TCL_STUBS) && !defined(USE_TCL_STUB_PROCS) */ diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c index 3ae0c16..22e1db0 100644 --- a/generic/tclRegexp.c +++ b/generic/tclRegexp.c @@ -10,7 +10,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclRegexp.c,v 1.4 1999/05/14 02:04:05 stanton Exp $ + * RCS: @(#) $Id: tclRegexp.c,v 1.5 1999/05/22 01:20:13 stanton Exp $ */ #include "tclInt.h" @@ -751,7 +751,7 @@ CompileRegexp(interp, string, length, flags) * a regexp if it has the same pattern and the same flags. */ - for (i = 0; i < NUM_REGEXPS; i++) { + for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) { if ((length == tsdPtr->patLengths[i]) && (tsdPtr->regexps[i]->flags == flags) && (strcmp(string, tsdPtr->patterns[i]) == 0)) { diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index 632f4a8..1638f8d 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -8,7 +8,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclStubInit.c,v 1.13 1999/05/06 18:46:25 stanton Exp $ + * RCS: @(#) $Id: tclStubInit.c,v 1.14 1999/05/22 01:20:13 stanton Exp $ */ #include "tclInt.h" @@ -686,6 +686,11 @@ TclStubs tclStubs = { Tcl_Stat, /* 368 */ Tcl_UtfNcmp, /* 369 */ Tcl_UtfNcasecmp, /* 370 */ + Tcl_StringCaseMatch, /* 371 */ + Tcl_UniCharIsControl, /* 372 */ + Tcl_UniCharIsGraph, /* 373 */ + Tcl_UniCharIsPrint, /* 374 */ + Tcl_UniCharIsPunct, /* 375 */ }; /* !END!: Do not edit above this line. */ diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 2361a2e..635ffbe 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -8,7 +8,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclUtf.c,v 1.6 1999/05/20 23:40:34 hershey Exp $ + * RCS: @(#) $Id: tclUtf.c,v 1.7 1999/05/22 01:20:13 stanton Exp $ */ #include "tclInt.h" @@ -35,6 +35,16 @@ #define CONNECTOR_BITS (1 << CONNECTOR_PUNCTUATION) +#define PRINT_BITS (ALPHA_BITS | DIGIT_BITS | SPACE_BITS | \ + (1 << NON_SPACING_MARK) | (1 << ENCLOSING_MARK) | \ + (1 << COMBINING_SPACING_MARK) | (1 << LETTER_NUMBER) | \ + (1 << OTHER_NUMBER) | (1 << CONNECTOR_PUNCTUATION) | \ + (1 << DASH_PUNCTUATION) | (1 << OPEN_PUNCTUATION) | \ + (1 << CLOSE_PUNCTUATION) | (1 << INITIAL_QUOTE_PUNCTUATION) | \ + (1 << FINAL_QUOTE_PUNCTUATION) | (1 << OTHER_PUNCTUATION) | \ + (1 << MATH_SYMBOL) | (1 << CURRENCY_SYMBOL) | \ + (1 << MODIFIER_SYMBOL) | (1 << OTHER_SYMBOL)) + /* * Unicode characters less than this value are represented by themselves * in UTF-8 strings. @@ -1341,6 +1351,29 @@ Tcl_UniCharIsAlpha(ch) /* *---------------------------------------------------------------------- * + * Tcl_UniCharIsControl -- + * + * Test if a character is a Unicode control character. + * + * Results: + * Returns non-zero if character is a control. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +Tcl_UniCharIsControl(ch) + int ch; /* Unicode character to test. */ +{ + return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == CONTROL); +} + +/* + *---------------------------------------------------------------------- + * * Tcl_UniCharIsDigit -- * * Test if a character is a numeric Unicode character. @@ -1365,6 +1398,30 @@ Tcl_UniCharIsDigit(ch) /* *---------------------------------------------------------------------- * + * Tcl_UniCharIsGraph -- + * + * Test if a character is any Unicode print character except space. + * + * Results: + * Returns non-zero if character is printable, but not space. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +Tcl_UniCharIsGraph(ch) + int ch; /* Unicode character to test. */ +{ + register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); + return (((PRINT_BITS >> category) & 1) && ((unsigned char) ch != ' ')); +} + +/* + *---------------------------------------------------------------------- + * * Tcl_UniCharIsLower -- * * Test if a character is a lowercase Unicode character. @@ -1388,6 +1445,55 @@ Tcl_UniCharIsLower(ch) /* *---------------------------------------------------------------------- * + * Tcl_UniCharIsPrint -- + * + * Test if a character is a Unicode print character. + * + * Results: + * Returns non-zero if character is printable. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +Tcl_UniCharIsPrint(ch) + int ch; /* Unicode character to test. */ +{ + register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); + return ((PRINT_BITS >> category) & 1); +} + +/* + *---------------------------------------------------------------------- + * + * Tcl_UniCharIsPunct -- + * + * Test if for any printing char that is neither space or an alnum. + * + * Results: + * Returns non-zero if character is punct. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +Tcl_UniCharIsPunct(ch) + int ch; /* Unicode character to test. */ +{ + register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); + return (((PRINT_BITS >> category) & 1) && ((unsigned char) ch != ' ') + && !(((ALPHA_BITS | DIGIT_BITS) >> category) & 1)); +} + +/* + *---------------------------------------------------------------------- + * * Tcl_UniCharIsSpace -- * * Test if a character is a whitespace Unicode character. diff --git a/generic/tclUtil.c b/generic/tclUtil.c index fa4c22f..d60e409 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -10,7 +10,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclUtil.c,v 1.11 1999/05/06 19:21:11 stanton Exp $ + * RCS: @(#) $Id: tclUtil.c,v 1.12 1999/05/22 01:20:13 stanton Exp $ */ #include "tclInt.h" @@ -1234,6 +1234,167 @@ Tcl_StringMatch(string, pattern) /* *---------------------------------------------------------------------- * + * Tcl_StringCaseMatch -- + * + * See if a particular string matches a particular pattern. + * Allows case insensitivity. + * + * Results: + * The return value is 1 if string matches pattern, and + * 0 otherwise. The matching operation permits the following + * special characters in the pattern: *?\[] (see the manual + * entry for details on what these mean). + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +Tcl_StringCaseMatch(string, pattern, nocase) + CONST char *string; /* String. */ + CONST char *pattern; /* Pattern, which may contain special + * characters. */ + int nocase; /* 0 for case sensitive, 1 for insensitive */ +{ + int p, s; + CONST char *pstart = pattern; + Tcl_UniChar ch1, ch2; + + while (1) { + p = *pattern; + s = *string; + + /* + * See if we're at the end of both the pattern and the string. If + * so, we succeeded. If we're at the end of the pattern but not at + * the end of the string, we failed. + */ + + if (p == '\0') { + return (s == '\0'); + } + if ((s == '\0') && (p != '*')) { + return 0; + } + + /* Check for a "*" as the next pattern character. It matches + * any substring. We handle this by calling ourselves + * recursively for each postfix of string, until either we + * match or we reach the end of the string. + */ + + if (p == '*') { + pattern++; + if (*pattern == '\0') { + return 1; + } + while (1) { + if (Tcl_StringCaseMatch(string, pattern, nocase)) { + return 1; + } + if (*string == '\0') { + return 0; + } + string++; + } + } + + /* Check for a "?" as the next pattern character. It matches + * any single character. + */ + + if (p == '?') { + pattern++; + string += Tcl_UtfToUniChar(string, &ch1); + continue; + } + + /* Check for a "[" as the next pattern character. It is followed + * by a list of characters that are acceptable, or by a range + * (two characters separated by "-"). + */ + + if (p == '[') { + Tcl_UniChar startChar, endChar; + + pattern++; + string += Tcl_UtfToUniChar(string, &ch1); + if (nocase) { + ch1 = Tcl_UniCharToLower(ch1); + } + while (1) { + if ((*pattern == ']') || (*pattern == '\0')) { + return 0; + } + pattern += Tcl_UtfToUniChar(pattern, &startChar); + if (nocase) { + startChar = Tcl_UniCharToLower(startChar); + } + if (*pattern == '-') { + pattern++; + if (*pattern == '\0') { + return 0; + } + pattern += Tcl_UtfToUniChar(pattern, &endChar); + if (nocase) { + endChar = Tcl_UniCharToLower(endChar); + } + if (((startChar <= ch1) && (ch1 <= endChar)) + || ((endChar <= ch1) && (ch1 <= startChar))) { + /* + * Matches ranges of form [a-z] or [z-a]. + */ + + break; + } + } else if (startChar == ch1) { + break; + } + } + while (*pattern != ']') { + if (*pattern == '\0') { + pattern = Tcl_UtfPrev(pattern, pstart); + break; + } + pattern++; + } + pattern++; + continue; + } + + /* If the next pattern character is '\', just strip off the '\' + * so we do exact matching on the character that follows. + */ + + if (p == '\\') { + pattern++; + p = *pattern; + if (p == '\0') { + return 0; + } + } + + /* There's no special character. Just make sure that the next + * bytes of each string match. + */ + + string += Tcl_UtfToUniChar(string, &ch1); + pattern += Tcl_UtfToUniChar(pattern, &ch2); + if (nocase) { + if (Tcl_UniCharToLower(ch1) != Tcl_UniCharToLower(ch2)) { + return 0; + } + } else if (ch1 != ch2) { + return 0; + } + } +} + +/* + *---------------------------------------------------------------------- + * * Tcl_DStringInit -- * * Initializes a dynamic string, discarding any previous contents |