diff options
author | stanton <stanton> | 1999-05-22 01:20:10 (GMT) |
---|---|---|
committer | stanton <stanton> | 1999-05-22 01:20:10 (GMT) |
commit | ac39508cf97576cd9747c5630c4a13d794663b4a (patch) | |
tree | 4b7c61e6c670f227cf4d603907157fb6246d2d50 /generic/tclCmdMZ.c | |
parent | 21bd132482f68735f5a4381934f56ee911904e87 (diff) | |
download | tcl-ac39508cf97576cd9747c5630c4a13d794663b4a.zip tcl-ac39508cf97576cd9747c5630c4a13d794663b4a.tar.gz tcl-ac39508cf97576cd9747c5630c4a13d794663b4a.tar.bz2 |
Merged changes from scriptics-tclpro-1-3-b2 branch
Diffstat (limited to 'generic/tclCmdMZ.c')
-rw-r--r-- | generic/tclCmdMZ.c | 212 |
1 files changed, 164 insertions, 48 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index dc5607c..5488773 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -13,7 +13,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdMZ.c,v 1.8 1999/05/06 22:50:03 stanton Exp $ + * RCS: @(#) $Id: tclCmdMZ.c,v 1.9 1999/05/22 01:20:12 stanton Exp $ */ #include "tclInt.h" @@ -896,7 +896,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) } if (nocase) { match = Tcl_UtfNcasecmp(string1, string2, - (unsigned)length); + (unsigned) length); } else { match = Tcl_UtfNcmp(string1, string2, (unsigned) length); @@ -912,7 +912,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) } if ((enum options) index == STR_EQUAL) { - Tcl_SetIntObj(resultPtr, (match) ? 0 : 1); + Tcl_SetBooleanObj(resultPtr, (match) ? 0 : 1); } else { Tcl_SetIntObj(resultPtr, ((match > 0) ? 1 : (match < 0) ? -1 : 0)); @@ -921,21 +921,47 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) } case STR_FIRST: { register char *p, *end; - int match; + int match, utflen, start; - if (objc != 4) { - badFirstLastArgs: - Tcl_WrongNumArgs(interp, 2, objv, "string1 string2"); + if (objc < 4 || objc > 5) { + Tcl_WrongNumArgs(interp, 2, objv, + "string1 string2 ?startIndex?"); return TCL_ERROR; } /* * This algorithm fails on improperly formed UTF strings. + * We are searching string2 for the sequence string1. */ match = -1; + start = 0; + utflen = -1; string1 = Tcl_GetStringFromObj(objv[2], &length1); string2 = Tcl_GetStringFromObj(objv[3], &length2); + + if (objc == 5) { + /* + * If a startIndex is specified, we will need to fast forward + * to that point in the string before we think about a match + */ + utflen = Tcl_NumUtfChars(string2, length2); + if (TclGetIntForIndex(interp, objv[4], utflen-1, + &start) != TCL_OK) { + return TCL_ERROR; + } + if (start >= utflen) { + goto str_first_done; + } else if (start > 0) { + if (length2 == utflen) { + /* no unicode chars */ + string2 += start; + } else { + string2 = Tcl_UtfAtIndex(string2, start); + } + } + } + if (length1 > 0) { end = string2 + length2 - length1 + 1; for (p = string2; p < end; p++) { @@ -955,19 +981,25 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) } /* - * Compute the character index of the matching string by counting - * the number of characters before the match. + * Compute the character index of the matching string by + * counting the number of characters before the match. */ - + str_first_done: if (match != -1) { - match = Tcl_NumUtfChars(string2, match); + if (objc == 4) { + match = Tcl_NumUtfChars(string2, match); + } else if (length2 == utflen) { + /* no unicode chars */ + match += start; + } else { + match = start + Tcl_NumUtfChars(string2, match); + } } Tcl_SetIntObj(resultPtr, match); break; } case STR_INDEX: { int index; - char buf[TCL_UTF_MAX]; if (objc != 4) { Tcl_WrongNumArgs(interp, 2, objv, "string charIndex"); @@ -977,15 +1009,25 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) /* * establish what 'end' really means */ - length2 = Tcl_NumUtfChars(string1, length1) - 1; - if (TclGetIntForIndex(interp, objv[3], length2, + length2 = Tcl_NumUtfChars(string1, length1); + if (TclGetIntForIndex(interp, objv[3], length2 - 1, &index) != TCL_OK) { return TCL_ERROR; } - if ((index >= 0) && (index < length1)) { - length2 = Tcl_UniCharToUtf(Tcl_UniCharAtIndex(string1, - index), buf); - Tcl_SetStringObj(resultPtr, buf, length2); + /* + * index must be between 0 and the UTF length to be valid + */ + if ((index >= 0) && (index < length2)) { + if (length1 == length2) { + /* no unicode chars */ + Tcl_SetStringObj(resultPtr, string1+index, 1); + } else { + char buf[TCL_UTF_MAX]; + + length2 = Tcl_UniCharToUtf(Tcl_UniCharAtIndex(string1, + index), buf); + Tcl_SetStringObj(resultPtr, buf, length2); + } } break; } @@ -997,18 +1039,18 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) Tcl_Obj *objPtr, *failVarObj = NULL; static char *isOptions[] = { - "alnum", "alpha", "ascii", - "boolean", "digit", "double", - "false", "integer", "lower", - "space", "true", "upper", - "wordchar", (char *) NULL + "alnum", "alpha", "ascii", "control", + "boolean", "digit", "double", "false", + "graph", "integer", "lower", "print", + "punct", "space", "true", "upper", + "wordchar", "xdigit", (char *) NULL }; enum isOptions { - STR_IS_ALNUM, STR_IS_ALPHA, STR_IS_ASCII, - STR_IS_BOOL, STR_IS_DIGIT, STR_IS_DOUBLE, - STR_IS_FALSE, STR_IS_INT, STR_IS_LOWER, - STR_IS_SPACE, STR_IS_TRUE, STR_IS_UPPER, - STR_IS_WORD + STR_IS_ALNUM, STR_IS_ALPHA, STR_IS_ASCII, STR_IS_CONTROL, + STR_IS_BOOL, STR_IS_DIGIT, STR_IS_DOUBLE, STR_IS_FALSE, + STR_IS_GRAPH, STR_IS_INT, STR_IS_LOWER, STR_IS_PRINT, + STR_IS_PUNCT, STR_IS_SPACE, STR_IS_TRUE, STR_IS_UPPER, + STR_IS_WORD, STR_IS_XDIGIT }; if (objc < 4 || objc > 7) { @@ -1101,6 +1143,9 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) result = 0; } break; + case STR_IS_CONTROL: + chcomp = Tcl_UniCharIsControl; + break; case STR_IS_DIGIT: chcomp = Tcl_UniCharIsDigit; break; @@ -1162,6 +1207,9 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) } break; } + case STR_IS_GRAPH: + chcomp = Tcl_UniCharIsGraph; + break; case STR_IS_INT: { char *stop; @@ -1170,7 +1218,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) break; } /* - * Like STR_IS_DOUBLE, but we don't use strtoul. + * Like STR_IS_DOUBLE, but we use strtoul. * Since Tcl_GetInt already failed, we set result to 0. */ result = 0; @@ -1204,6 +1252,12 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) case STR_IS_LOWER: chcomp = Tcl_UniCharIsLower; break; + case STR_IS_PRINT: + chcomp = Tcl_UniCharIsPrint; + break; + case STR_IS_PUNCT: + chcomp = Tcl_UniCharIsPunct; + break; case STR_IS_SPACE: chcomp = Tcl_UniCharIsSpace; break; @@ -1213,6 +1267,17 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) case STR_IS_WORD: chcomp = Tcl_UniCharIsWordChar; break; + case STR_IS_XDIGIT: { + for (; string1 < end; string1++, failat++) { + /* INTL: We assume unicode is bad for this class */ + if ((*((unsigned char *)string1) >= 0xC0) || + !isxdigit(*(unsigned char *)string1)) { + result = 0; + break; + } + } + break; + } } if (chcomp != NULL) { for (; string1 < end; string1 += length2, failat++) { @@ -1238,10 +1303,12 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) } case STR_LAST: { register char *p; - int match; + int match, utflen, start; - if (objc != 4) { - goto badFirstLastArgs; + if (objc < 4 || objc > 5) { + Tcl_WrongNumArgs(interp, 2, objv, + "string1 string2 ?startIndex?"); + return TCL_ERROR; } /* @@ -1249,14 +1316,43 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) */ match = -1; + start = 0; + utflen = -1; string1 = Tcl_GetStringFromObj(objv[2], &length1); string2 = Tcl_GetStringFromObj(objv[3], &length2); + + if (objc == 5) { + /* + * If a startIndex is specified, we will need to restrict + * the string range to that char index in the string + */ + utflen = Tcl_NumUtfChars(string2, length2); + if (TclGetIntForIndex(interp, objv[4], utflen-1, + &start) != TCL_OK) { + return TCL_ERROR; + } + if (start < 0) { + goto str_last_done; + } else if (start < utflen) { + if (length2 == utflen) { + /* no unicode chars */ + p = string2 + start + 1 - length1; + } else { + p = Tcl_UtfAtIndex(string2, start+1) - length1; + } + } else { + p = string2 + length2 - length1; + } + } else { + p = string2 + length2 - length1; + } + if (length1 > 0) { - for (p = string2 + length2 - length1; p >= string2; p--) { + for (; p >= string2; p--) { /* * Scan backwards to find the first character. */ - + while ((p != string2) && (*p != *string1)) { p--; } @@ -1271,9 +1367,12 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) * Compute the character index of the matching string by counting * the number of characters before the match. */ - + str_last_done: if (match != -1) { - match = Tcl_NumUtfChars(string2, match); + if ((objc == 4) || (length2 != utflen)) { + /* only check when we've got unicode chars */ + match = Tcl_NumUtfChars(string2, match); + } } Tcl_SetIntObj(resultPtr, match); break; @@ -1408,14 +1507,30 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) break; } case STR_MATCH: { - if (objc != 4) { - Tcl_WrongNumArgs(interp, 2, objv, "pattern string"); + int nocase = 0; + + if (objc < 4 || objc > 5) { + Tcl_WrongNumArgs(interp, 2, objv, "?-nocase? pattern string"); return TCL_ERROR; } - string1 = Tcl_GetStringFromObj(objv[2], &length1); - string2 = Tcl_GetStringFromObj(objv[3], &length2); - Tcl_SetBooleanObj(resultPtr, Tcl_StringMatch(string2, string1)); + if (objc == 5) { + string2 = Tcl_GetStringFromObj(objv[2], &length2); + if ((length2 > 1) && + strncmp(string2, "-nocase", (size_t) length2) == 0) { + nocase = 1; + } else { + Tcl_AppendStringsToObj(resultPtr, "bad option \"", + string2, "\": must be -nocase", + (char *) NULL); + return TCL_ERROR; + } + } + + Tcl_SetBooleanObj(resultPtr, + Tcl_StringCaseMatch(Tcl_GetString(objv[objc-1]), + Tcl_GetString(objv[objc-2]), + nocase)); break; } case STR_RANGE: { @@ -1427,20 +1542,20 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) } string1 = Tcl_GetStringFromObj(objv[2], &length1); - length1 = Tcl_NumUtfChars(string1, length1); - if (TclGetIntForIndex(interp, objv[3], length1 - 1, + length1 = Tcl_NumUtfChars(string1, length1) - 1; + if (TclGetIntForIndex(interp, objv[3], length1, &first) != TCL_OK) { return TCL_ERROR; } - if (TclGetIntForIndex(interp, objv[4], length1 - 1, + if (TclGetIntForIndex(interp, objv[4], length1, &last) != TCL_OK) { return TCL_ERROR; } if (first < 0) { first = 0; } - if (last >= length1 - 1) { - last = length1 - 1; + if (last >= length1) { + last = length1; } if (last >= first) { char *start, *end; @@ -1474,8 +1589,9 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) case STR_REPLACE: { int first, last; - if (!(objc == 5 || objc == 6)) { - Tcl_WrongNumArgs(interp, 2, objv, "string first last ?string?"); + if (objc < 5 || objc > 6) { + Tcl_WrongNumArgs(interp, 2, objv, + "string first last ?string?"); return TCL_ERROR; } |