summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorstanton <stanton>1999-05-20 00:03:35 (GMT)
committerstanton <stanton>1999-05-20 00:03:35 (GMT)
commit2ebefcfc6f3566b43ad1c114fcea20fd2d1422e6 (patch)
treeed39f2ccdc495d26a90630aee5ad4881d40b8b3b
parentb2cc5aedb9bd61f39eb8e0f4f6d9c282f523bfc3 (diff)
downloadtcl-2ebefcfc6f3566b43ad1c114fcea20fd2d1422e6.zip
tcl-2ebefcfc6f3566b43ad1c114fcea20fd2d1422e6.tar.gz
tcl-2ebefcfc6f3566b43ad1c114fcea20fd2d1422e6.tar.bz2
Merged in various changes submitted by Jeff Hobbs:
* generic/tcl.decls: * generic/tclUtf.c: Added Tcl_UniCharIs* functions for control, graph, print, and punct classes. * generic/tclUtil.c: * doc/StrMatch.3: Added Tcl_StringCaseMatch() implementation to support case-insensitive globbing. * doc/string.n: * unix/mkLinks: * tests/string.test: * generic/tclCmdMZ.c: Added additional character class tests, added -nocase switch to "string match", changed string first/last to use offsets.
-rw-r--r--doc/StrMatch.322
-rw-r--r--doc/string.n159
-rw-r--r--generic/tcl.decls17
-rw-r--r--generic/tclCmdMZ.c212
-rw-r--r--generic/tclDecls.h38
-rw-r--r--generic/tclStubInit.c7
-rw-r--r--generic/tclUtf.c108
-rw-r--r--generic/tclUtil.c163
-rw-r--r--tests/string.test164
-rw-r--r--unix/mkLinks4
10 files changed, 755 insertions, 139 deletions
diff --git a/doc/StrMatch.3 b/doc/StrMatch.3
index 09cd6df..8b55deb 100644
--- a/doc/StrMatch.3
+++ b/doc/StrMatch.3
@@ -5,25 +5,34 @@
'\" See the file "license.terms" for information on usage and redistribution
'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
'\"
-'\" RCS: @(#) $Id: StrMatch.3,v 1.2 1998/09/14 18:39:50 stanton Exp $
+'\" RCS: @(#) $Id: StrMatch.3,v 1.2.8.1 1999/05/20 00:03:35 stanton Exp $
'\"
.so man.macros
-.TH Tcl_StringMatch 3 "" Tcl "Tcl Library Procedures"
+.TH Tcl_StringMatch 3 8.1 Tcl "Tcl Library Procedures"
.BS
.SH NAME
-Tcl_StringMatch \- test whether a string matches a pattern
+Tcl_StringMatch, Tcl_StringCaseMatch \- test whether a string matches a pattern
.SH SYNOPSIS
.nf
\fB#include <tcl.h>\fR
.sp
int
\fBTcl_StringMatch\fR(\fIstring\fR, \fIpattern\fR)
+.VS 8.1
+.sp
+\fBTcl_StringCaseMatch\fR(\fIstring, pattern, nocase\fR)
+.VE 8.1
.SH ARGUMENTS
.AP char *string in
String to test.
.AP char *pattern in
Pattern to match against string. May contain special
characters from the set *?\e[].
+.VS 8.1
+.AP int nocase in
+Specifies whether the match should be done case-sensitive (0) or
+case-insensitive (1).
+.VE 8.1
.BE
.SH DESCRIPTION
@@ -34,6 +43,13 @@ a given pattern. If it does, then \fBTcl_StringMatch\fR returns
used for matching is the same algorithm used in the ``string match''
Tcl command and is similar to the algorithm used by the C-shell
for file name matching; see the Tcl manual entry for details.
+.VS 8.1
+.PP
+In \fBTcl_StringCaseMatch\fR, the algorithm is the same, but you have
+the option to make the matching case-insensitive. If you choose this
+(by passing \fBnocase\fR as 1), then the string and pattern are
+essentially matched in the lower case.
+.VE 8.1
.SH KEYWORDS
match, pattern, string
diff --git a/doc/string.n b/doc/string.n
index 07ab0a7..97137a7 100644
--- a/doc/string.n
+++ b/doc/string.n
@@ -5,7 +5,7 @@
'\" See the file "license.terms" for information on usage and redistribution
'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
'\"
-'\" RCS: @(#) $Id: string.n,v 1.9 1999/05/06 22:50:02 stanton Exp $
+'\" RCS: @(#) $Id: string.n,v 1.9.2.1 1999/05/20 00:03:35 stanton Exp $
'\"
.so man.macros
.TH string n 8.1 Tcl "Tcl Built-In Commands"
@@ -25,35 +25,55 @@ The legal \fIoption\fRs (which may be abbreviated) are:
.TP
\fBstring bytelength \fIstring\fR
Returns a decimal string giving the number of bytes used to represent
-\fIstring\fR in memory. Because UTF-8 uses one to three bytes to
+\fIstring\fR in memory. Because UTF\-8 uses one to three bytes to
represent Unicode characters, the byte length will not be the same as
the character length in general. The cases where a script cares about
the byte length are rare. In almost all cases, you should use the
-\fBstring length\fB operation. Refer to the \fBTcl_NumUtfChars\fR
-manual entry for more details on the UTF-8 representation.
+\fBstring length\fR operation. Refer to the \fBTcl_NumUtfChars\fR
+manual entry for more details on the UTF\-8 representation.
.TP
-\fBstring compare ?\fB-nocase\fR? ?\fB-length int\fR? \fIstring1 string2\fR
+\fBstring compare ?\fB\-nocase\fR? ?\fB\-length int\fR? \fIstring1 string2\fR
.VE 8.1
Perform a character-by-character comparison of strings \fIstring1\fR and
-\fIstring2\fR in the same way as the C \fBstrcmp\fR procedure. Return
+\fIstring2\fR. Returns
\-1, 0, or 1, depending on whether \fIstring1\fR is lexicographically
less than, equal to, or greater than \fIstring2\fR.
.VS 8.1
-If \fB-length\fR is specified, it works like C \fBstrncmp\fR,
-comparing only to the specified length. If \fB-length\fR is negative,
-it is ignored. If \fB-nocase\fR is specified, then the strings are
+If \fB\-length\fR is specified, then only the first \fIlength\fR characters
+are used in the comparison. If \fB\-length\fR is negative, it is
+ignored. If \fB\-nocase\fR is specified, then the strings are
compared in a case-insensitive manner.
.TP
-\fBstring equal ?\fB-nocase\fR? ?\fB-length int\fR? \fIstring1 string2\fR
-.VE 8.1
-Like the \fBcompare\fR method, but returns 1 when the strings
-are equal, or 0 when not.
+\fBstring equal ?\fB\-nocase\fR? ?\fB-length int\fR? \fIstring1
+string2\fR Perform a character-by-character comparison of strings
+\fIstring1\fR and \fIstring2\fR. Returns 1 if \fIstring1\fR and
+\fIstring2\fR are identical, or 0 when not. If \fB\-length\fR is
+specified, then only the first \fIlength\fR characters are used in the
+comparison. If \fB\-length\fR is negative, it is ignored. If
+\fB\-nocase\fR is specified, then the strings are compared in a
+case-insensitive manner.
.TP
-\fBstring first \fIstring1 string2\fR
+\fBstring first \fIstring1 string2\fR ?\fIstartIndex\fR?
+.VE 8.1
Search \fIstring2\fR for a sequence of characters that exactly match
the characters in \fIstring1\fR. If found, return the index of the
first character in the first such match within \fIstring2\fR. If not
found, return \-1.
+.VS 8.1
+If \fIstartIndex\fR is specified (in any of the forms accepted by the
+\fBindex\fR method), then the search is constrained to start with the
+character in \fIstring2\fR specified by the index. For example,
+.RS
+.CS
+\fBstring first a 0a23456789abcdef 5\fR
+.CE
+will return \fB10\fR, but
+.CS
+\fBstring first a 0123456789abcdef 11\fR
+.CE
+will return \fB\-1\fR.
+.RE
+.VE 8.1
.TP
\fBstring index \fIstring charIndex\fR
Returns the \fIcharIndex\fR'th character of the \fIstring\fR
@@ -67,9 +87,9 @@ follows:
The char specified at this integral index
.IP \fBend\fR 10
The last char of the string.
-.IP \fBend-\fIinteger\fR 10
+.IP \fBend\-\fIinteger\fR 10
The last char of the string minus the specified integer
-offset (e.g. \fBend-1\fR would refer to the "c" in "abcd").
+offset (e.g. \fBend\-1\fR would refer to the "c" in "abcd").
.PP
.VE 8.1
If \fIcharIndex\fR is less than 0 or greater than
@@ -78,14 +98,15 @@ returned.
.RE
.VS 8.1
.TP
-\fBstring is \fIclass\fR ?\fB-strict\fR? ?\fB-failindex \fIvarname\fR? \fIstring\fR
-See if \fIstring\fR is a valid form of the specified class. If
-\fB-strict\fR is specified, then an empty string returns 0, otherwise and
-empty string will return 1 on any class. If \fB-failindex\fR is specified,
-then if the function returns 0, the index in the string where the class was
-no longer valid will be stored in the variable named \fIvarname\fR. The
-\fIvarname\fR will not be set if the function returns 1. The following
-class definitions are allowed (the class name can be abbreviated):
+\fBstring is \fIclass\fR ?\fB\-strict\fR? ?\fB\-failindex \fIvarname\fR? \fIstring\fR
+Returns 1 if \fIstring\fR is a valid member of the specified character
+class, otherwise returns 0. If \fB\-strict\fR is specified, then an
+empty string returns 0, otherwise and empty string will return 1 on
+any class. If \fB\-failindex\fR is specified, then if the function
+returns 0, the index in the string where the class was no longer valid
+will be stored in the variable named \fIvarname\fR. The \fIvarname\fR
+will not be set if the function returns 1. The following character classes
+are recognized (the class name can be abbreviated):
.RS
.IP \fBalnum\fR 10
Any Unicode alphabet or digit character.
@@ -93,43 +114,68 @@ Any Unicode alphabet or digit character.
Any Unicode alphabet character.
.IP \fBascii\fR 10
Any character with a value less than \\u0080 (those that
-are in the 7-bit ascii range).
+are in the 7\-bit ascii range).
.IP \fBboolean\fR 10
-Any of the forms allowed to Tcl_GetBoolean.
+Any of the forms allowed to \fBTcl_GetBoolean\fR.
+.IP \fBcontrol\fR 10
+Any Unicode control character.
.IP \fBdigit\fR 10
Any Unicode digit character.
.IP \fBdouble\fR 10
Any of the valid forms for a double in Tcl, with optional surrounding
whitespace. In case of under/overflow in the value, 0 is returned
-and the \fIvarname\fR will contain -1.
+and the \fIvarname\fR will contain \-1.
.IP \fBfalse\fR 10
-Any of the forms allowed to Tcl_GetBoolean where the value is false.
+Any of the forms allowed to \fBTcl_GetBoolean\fR where the value is false.
+.IP \fBgraph\fR 10
+Any Unicode printing character, except space.
.IP \fBinteger\fR 10
Any of the valid forms for an integer in Tcl, with optional surrounding
whitespace. In case of under/overflow in the value, 0 is returned
-and the \fIvarname\fR will contain -1.
+and the \fIvarname\fR will contain \-1.
.IP \fBlower\fR 10
Any Unicode lower case alphabet character.
+.IP \fBprint\fR 10
+Any Unicode printing character, including space.
+.IP \fBpunct\fR 10
+Any Unicode printing character, except space or where \fBalnum\fR is true.
.IP \fBspace\fR 10
Any Unicode space character.
.IP \fBtrue\fR 10
-Any of the forms allowed to Tcl_GetBoolean where the value is true.
+Any of the forms allowed to \fBTcl_GetBoolean\fR where the value is true.
.IP \fBupper\fR 10
Any upper case alphabet character in the Unicode character set.
.IP \fBwordchar\fR 10
Any Unicode word character. That is any alphanumeric character,
-and any Unicode connector punctuation characters (ie: underscore).
+and any Unicode connector punctuation characters (e.g. underscore).
+.IP \fBxdigit\fR 10
+Any hexadecimal digit character ([0\-9A\-Fa\-f]).
.RE
In the case of \fBboolean\fR, \fBtrue\fR and \fBfalse\fR, if the
function will return 0, the \fIvarname\fR will always be set to 0,
due to the varied nature of a valid boolean value.
-.VE 8.1
.TP
-\fBstring last \fIstring1 string2\fR
+\fBstring last \fIstring1 string2\fR ?\fIstartIndex\fR?
+.VE 8.1
Search \fIstring2\fR for a sequence of characters that exactly match
the characters in \fIstring1\fR. If found, return the index of the
first character in the last such match within \fIstring2\fR. If there
is no match, then return \-1.
+.VS 8.1
+If \fIstartIndex\fR is specified (in any of the forms accepted by the
+\fBindex\fR method), then only the characters in \fIstring2\fR at or before the
+specified \fIstartIndex\fR will be considered by the search. For example,
+.RS
+.CS
+\fBstring last a 0a23456789abcdef 15\fR
+.CE
+will return \fB10\fR, but
+.CS
+\fBstring last a 0a23456789abcdef 9\fR
+.CE
+will return \fB1\fR.
+.RE
+.VE 8.1
.TP
\fBstring length \fIstring\fR
Returns a decimal string giving the number of characters in
@@ -137,29 +183,33 @@ Returns a decimal string giving the number of characters in
number of bytes used to store the string.
.VS 8.1
.TP
-\fBstring map ?\fB-nocase\fR? \fIcharMap string\fR
+\fBstring map\fR ?\fB\-nocase\fR? \fIcharMap string\fR
Replaces characters in \fIstring\fR based on the key-value pairs in
-\fIcharMap\fR. \fIcharMap\fR is a list of key value key value ... as
-in the form returned by \fBarray get\fR. Each instance of a key in
-the string will be replace with its corresponding value. If
-\fB-nocase\fR is specified, then matching is done without regard to
-case differences. Both key and value may be multiple characters. This
-is done in an ordered manner, so the key appearing first in the list
-will be checked first, and so on. \fIstring\fR is only iterated over
-once, so earlier key replacements will have no affect for later key
-matches. For example,
+\fIcharMap\fR. \fIcharMap\fR is a list of \fIkey value key value\fR
+... as in the form returned by \fBarray get\fR. Each instance of a
+key in the string will be replaced with its corresponding value. If
+\fB\-nocase\fR is specified, then matching is done without regard to
+case differences. Both \fIkey\fR and \fIvalue\fR may be multiple
+characters. Replacement is done in an ordered manner, so the key appearing
+first in the list will be checked first, and so on. \fIstring\fR is
+only iterated over once, so earlier key replacements will have no
+affect for later key matches. For example,
.RS
.CS
\fBstring map {abc 1 ab 2 a 3 1 0} 1abcaababcabababc\fR
.CE
will return the string \fB01321221\fR.
.RE
-.VE 8.1
.TP
-\fBstring match \fIpattern\fR \fIstring\fR
+\fBstring match ?\fB\-nocase\fR? \fIpattern\fR \fIstring\fR
+.VE 8.1
See if \fIpattern\fR matches \fIstring\fR; return 1 if it does, 0
-if it doesn't. Matching is done in a fashion similar to that
-used by the C-shell. For the two strings to match, their contents
+if it doesn't.
+.VS 8.1
+If \fB\-nocase\fR is specified, then the pattern attempts to match
+against the string in a case insensitive manner.
+.VE 8.1
+For the two strings to match, their contents
must be identical except that the following special sequences
may appear in \fIpattern\fR:
.RS
@@ -173,6 +223,13 @@ Matches any character in the set given by \fIchars\fR. If a sequence
of the form
\fIx\fB\-\fIy\fR appears in \fIchars\fR, then any character
between \fIx\fR and \fIy\fR, inclusive, will match.
+.VS 8.1
+When used with \fB\-nocase\fR, the end points of the range are converted
+to lower case first. Whereas {[A\-z]} matches '_' when matching
+case-sensitively ('_' falls between the 'Z' and 'a'), with \fB\-nocase\fR
+this is considered like {[A\-Za\-z]} (and probably what was meant in the
+first place).
+.VE 8.1
.IP \fB\e\fIx\fR 10
Matches the single character \fIx\fR. This provides a way of
avoiding the special interpretation of the characters
@@ -196,12 +253,12 @@ it is treated as if it were \fBend\fR. If \fIfirst\fR is greater than
\fBstring repeat \fIstring count\fR
Returns \fIstring\fR repeated \fIcount\fR number of times.
.TP
-\fBstring replace \fIstring last\fR ?\fIstring\fR?
+\fBstring replace \fIstring first last\fR ?\fInewstring\fR?
Removes a range of consecutive characters from \fIstring\fR, starting
with the character whose index is \fIfirst\fR and ending with the
character whose index is \fIlast\fR. An index of 0 refers to the
-first character of the string. \fIfirst\fR and \fIlast\fR may be
-specified as for the \fBindex\fR method. If \fIstring\fR is
+first character of the string. \fIFirst\fR and \fIlast\fR may be
+specified as for the \fBindex\fR method. If \fInewstring\fR is
specified, then it is placed in the removed character range.
If \fIfirst\fR is less than zero then it is treated as if it were zero, and
if \fIlast\fR is greater than or equal to the length of the string then
@@ -276,4 +333,4 @@ single character other than these.
.VE 8.1
.SH KEYWORDS
-case conversion, compare, index, match, pattern, string, word
+case conversion, compare, index, match, pattern, string, word, equal, ctype
diff --git a/generic/tcl.decls b/generic/tcl.decls
index d77b076..106d065 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -10,7 +10,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: tcl.decls,v 1.12 1999/05/07 20:07:46 stanton Exp $
+# RCS: @(#) $Id: tcl.decls,v 1.12.2.1 1999/05/20 00:03:35 stanton Exp $
library tcl
@@ -1262,6 +1262,21 @@ declare 369 generic {
declare 370 generic {
int Tcl_UtfNcasecmp(CONST char *s1, CONST char *s2, size_t n)
}
+declare 371 generic {
+ int Tcl_StringCaseMatch(CONST char *str, CONST char *pattern, int nocase)
+}
+declare 372 generic {
+ int Tcl_UniCharIsControl(int ch)
+}
+declare 373 generic {
+ int Tcl_UniCharIsGraph(int ch)
+}
+declare 374 generic {
+ int Tcl_UniCharIsPrint(int ch)
+}
+declare 375 generic {
+ int Tcl_UniCharIsPunct(int ch)
+}
##############################################################################
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index dc5607c..f9adc2b 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -13,7 +13,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.8 1999/05/06 22:50:03 stanton Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.8.2.1 1999/05/20 00:03:36 stanton Exp $
*/
#include "tclInt.h"
@@ -896,7 +896,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
}
if (nocase) {
match = Tcl_UtfNcasecmp(string1, string2,
- (unsigned)length);
+ (unsigned) length);
} else {
match = Tcl_UtfNcmp(string1, string2,
(unsigned) length);
@@ -912,7 +912,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
}
if ((enum options) index == STR_EQUAL) {
- Tcl_SetIntObj(resultPtr, (match) ? 0 : 1);
+ Tcl_SetBooleanObj(resultPtr, (match) ? 0 : 1);
} else {
Tcl_SetIntObj(resultPtr, ((match > 0) ? 1 :
(match < 0) ? -1 : 0));
@@ -921,21 +921,47 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
}
case STR_FIRST: {
register char *p, *end;
- int match;
+ int match, utflen, start;
- if (objc != 4) {
- badFirstLastArgs:
- Tcl_WrongNumArgs(interp, 2, objv, "string1 string2");
+ if (objc < 4 || objc > 5) {
+ Tcl_WrongNumArgs(interp, 2, objv,
+ "string1 string2 ?startIndex?");
return TCL_ERROR;
}
/*
* This algorithm fails on improperly formed UTF strings.
+ * We are searching string2 for the sequence string1.
*/
match = -1;
+ start = 0;
+ utflen = -1;
string1 = Tcl_GetStringFromObj(objv[2], &length1);
string2 = Tcl_GetStringFromObj(objv[3], &length2);
+
+ if (objc == 5) {
+ /*
+ * If a startIndex is specified, we will need to fast forward
+ * to that point in the string before we think about a match
+ */
+ utflen = Tcl_NumUtfChars(string2, length2);
+ if (TclGetIntForIndex(interp, objv[4], utflen-1,
+ &start) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ if (start >= utflen) {
+ goto str_first_done;
+ } else if (start > 0) {
+ if (length2 == utflen) {
+ /* no unicode chars */
+ string2 += start;
+ } else {
+ string2 = Tcl_UtfAtIndex(string2, start);
+ }
+ }
+ }
+
if (length1 > 0) {
end = string2 + length2 - length1 + 1;
for (p = string2; p < end; p++) {
@@ -955,19 +981,25 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
}
/*
- * Compute the character index of the matching string by counting
- * the number of characters before the match.
+ * Compute the character index of the matching string by
+ * counting the number of characters before the match.
*/
-
+ str_first_done:
if (match != -1) {
- match = Tcl_NumUtfChars(string2, match);
+ if (objc == 4) {
+ match = Tcl_NumUtfChars(string2, match);
+ } else if (length2 == utflen) {
+ /* no unicode chars */
+ match += start;
+ } else {
+ match = start + Tcl_NumUtfChars(string2, match);
+ }
}
Tcl_SetIntObj(resultPtr, match);
break;
}
case STR_INDEX: {
int index;
- char buf[TCL_UTF_MAX];
if (objc != 4) {
Tcl_WrongNumArgs(interp, 2, objv, "string charIndex");
@@ -977,15 +1009,25 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
/*
* establish what 'end' really means
*/
- length2 = Tcl_NumUtfChars(string1, length1) - 1;
- if (TclGetIntForIndex(interp, objv[3], length2,
+ length2 = Tcl_NumUtfChars(string1, length1);
+ if (TclGetIntForIndex(interp, objv[3], length2 - 1,
&index) != TCL_OK) {
return TCL_ERROR;
}
- if ((index >= 0) && (index < length1)) {
- length2 = Tcl_UniCharToUtf(Tcl_UniCharAtIndex(string1,
- index), buf);
- Tcl_SetStringObj(resultPtr, buf, length2);
+ /*
+ * index must be between 0 and the UTF length to be valid
+ */
+ if ((index >= 0) && (index < length2)) {
+ if (length1 == length2) {
+ /* no unicode chars */
+ Tcl_SetStringObj(resultPtr, string1+index, 1);
+ } else {
+ char buf[TCL_UTF_MAX];
+
+ length2 = Tcl_UniCharToUtf(Tcl_UniCharAtIndex(string1,
+ index), buf);
+ Tcl_SetStringObj(resultPtr, buf, length2);
+ }
}
break;
}
@@ -997,18 +1039,18 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
Tcl_Obj *objPtr, *failVarObj = NULL;
static char *isOptions[] = {
- "alnum", "alpha", "ascii",
- "boolean", "digit", "double",
- "false", "integer", "lower",
- "space", "true", "upper",
- "wordchar", (char *) NULL
+ "alnum", "alpha", "ascii", "control",
+ "boolean", "digit", "double", "false",
+ "graph", "integer", "lower", "print",
+ "punct", "space", "true", "upper",
+ "wordchar", "xdigit", (char *) NULL
};
enum isOptions {
- STR_IS_ALNUM, STR_IS_ALPHA, STR_IS_ASCII,
- STR_IS_BOOL, STR_IS_DIGIT, STR_IS_DOUBLE,
- STR_IS_FALSE, STR_IS_INT, STR_IS_LOWER,
- STR_IS_SPACE, STR_IS_TRUE, STR_IS_UPPER,
- STR_IS_WORD
+ STR_IS_ALNUM, STR_IS_ALPHA, STR_IS_ASCII, STR_IS_CONTROL,
+ STR_IS_BOOL, STR_IS_DIGIT, STR_IS_DOUBLE, STR_IS_FALSE,
+ STR_IS_GRAPH, STR_IS_INT, STR_IS_LOWER, STR_IS_PRINT,
+ STR_IS_PUNCT, STR_IS_SPACE, STR_IS_TRUE, STR_IS_UPPER,
+ STR_IS_WORD, STR_IS_XDIGIT
};
if (objc < 4 || objc > 7) {
@@ -1101,6 +1143,9 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
result = 0;
}
break;
+ case STR_IS_CONTROL:
+ chcomp = Tcl_UniCharIsControl;
+ break;
case STR_IS_DIGIT:
chcomp = Tcl_UniCharIsDigit;
break;
@@ -1162,6 +1207,9 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
}
break;
}
+ case STR_IS_GRAPH:
+ chcomp = Tcl_UniCharIsGraph;
+ break;
case STR_IS_INT: {
char *stop;
@@ -1170,7 +1218,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
break;
}
/*
- * Like STR_IS_DOUBLE, but we don't use strtoul.
+ * Like STR_IS_DOUBLE, but we use strtoul.
* Since Tcl_GetInt already failed, we set result to 0.
*/
result = 0;
@@ -1204,6 +1252,12 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
case STR_IS_LOWER:
chcomp = Tcl_UniCharIsLower;
break;
+ case STR_IS_PRINT:
+ chcomp = Tcl_UniCharIsPrint;
+ break;
+ case STR_IS_PUNCT:
+ chcomp = Tcl_UniCharIsPunct;
+ break;
case STR_IS_SPACE:
chcomp = Tcl_UniCharIsSpace;
break;
@@ -1213,6 +1267,17 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
case STR_IS_WORD:
chcomp = Tcl_UniCharIsWordChar;
break;
+ case STR_IS_XDIGIT: {
+ for (; string1 < end; string1++, failat++) {
+ /* INTL: We assume unicode is bad for this class */
+ if ((*((unsigned char *)string1) >= 0xC0) ||
+ !isxdigit(*(unsigned char *)string1)) {
+ result = 0;
+ break;
+ }
+ }
+ break;
+ }
}
if (chcomp != NULL) {
for (; string1 < end; string1 += length2, failat++) {
@@ -1238,10 +1303,12 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
}
case STR_LAST: {
register char *p;
- int match;
+ int match, utflen, start;
- if (objc != 4) {
- goto badFirstLastArgs;
+ if (objc < 4 || objc > 5) {
+ Tcl_WrongNumArgs(interp, 2, objv,
+ "string1 string2 ?startIndex?");
+ return TCL_ERROR;
}
/*
@@ -1249,14 +1316,43 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
*/
match = -1;
+ start = 0;
+ utflen = -1;
string1 = Tcl_GetStringFromObj(objv[2], &length1);
string2 = Tcl_GetStringFromObj(objv[3], &length2);
+
+ if (objc == 5) {
+ /*
+ * If a startIndex is specified, we will need to restrict
+ * the string range to that char index in the string
+ */
+ utflen = Tcl_NumUtfChars(string2, length2);
+ if (TclGetIntForIndex(interp, objv[4], utflen-1,
+ &start) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ if (start < 0) {
+ goto str_last_done;
+ } else if (start < utflen) {
+ if (length2 == utflen) {
+ /* no unicode chars */
+ p = string2 + start + 1 - length1;
+ } else {
+ p = Tcl_UtfAtIndex(string2, start+1) - length1;
+ }
+ } else {
+ p = string2 + length2 - length1;
+ }
+ } else {
+ p = string2 + length2 - length1;
+ }
+
if (length1 > 0) {
- for (p = string2 + length2 - length1; p >= string2; p--) {
+ for (; p >= string2; p--) {
/*
* Scan backwards to find the first character.
*/
-
+
while ((p != string2) && (*p != *string1)) {
p--;
}
@@ -1271,9 +1367,12 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
* Compute the character index of the matching string by counting
* the number of characters before the match.
*/
-
+ str_last_done:
if (match != -1) {
- match = Tcl_NumUtfChars(string2, match);
+ if ((objc == 4) || (length2 != utflen)) {
+ /* only check when we've got unicode chars */
+ match = Tcl_NumUtfChars(string2, match);
+ }
}
Tcl_SetIntObj(resultPtr, match);
break;
@@ -1408,14 +1507,30 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
break;
}
case STR_MATCH: {
- if (objc != 4) {
- Tcl_WrongNumArgs(interp, 2, objv, "pattern string");
+ int nocase = 0;
+
+ if (objc < 4 || objc > 5) {
+ Tcl_WrongNumArgs(interp, 2, objv, "?-nocase? pattern string");
return TCL_ERROR;
}
- string1 = Tcl_GetStringFromObj(objv[2], &length1);
- string2 = Tcl_GetStringFromObj(objv[3], &length2);
- Tcl_SetBooleanObj(resultPtr, Tcl_StringMatch(string2, string1));
+ if (objc == 5) {
+ string2 = Tcl_GetStringFromObj(objv[2], &length2);
+ if ((length2 > 1) &&
+ strncmp(string2, "-nocase", (size_t) length2) == 0) {
+ nocase = 1;
+ } else {
+ Tcl_AppendStringsToObj(resultPtr, "bad option \"",
+ string2, "\": must be -nocase",
+ (char *) NULL);
+ return TCL_ERROR;
+ }
+ }
+
+ Tcl_SetBooleanObj(resultPtr,
+ Tcl_StringCaseMatch(Tcl_GetString(objv[objc-1]),
+ Tcl_GetString(objv[objc-2]),
+ nocase));
break;
}
case STR_RANGE: {
@@ -1427,20 +1542,20 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
}
string1 = Tcl_GetStringFromObj(objv[2], &length1);
- length1 = Tcl_NumUtfChars(string1, length1);
- if (TclGetIntForIndex(interp, objv[3], length1 - 1,
+ length1 = Tcl_NumUtfChars(string1, length1) - 1;
+ if (TclGetIntForIndex(interp, objv[3], length1,
&first) != TCL_OK) {
return TCL_ERROR;
}
- if (TclGetIntForIndex(interp, objv[4], length1 - 1,
+ if (TclGetIntForIndex(interp, objv[4], length1,
&last) != TCL_OK) {
return TCL_ERROR;
}
if (first < 0) {
first = 0;
}
- if (last >= length1 - 1) {
- last = length1 - 1;
+ if (last >= length1) {
+ last = length1;
}
if (last >= first) {
char *start, *end;
@@ -1474,8 +1589,9 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
case STR_REPLACE: {
int first, last;
- if (!(objc == 5 || objc == 6)) {
- Tcl_WrongNumArgs(interp, 2, objv, "string first last ?string?");
+ if (objc < 5 || objc > 6) {
+ Tcl_WrongNumArgs(interp, 2, objv,
+ "string first last ?string?");
return TCL_ERROR;
}
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index ffb080d..c65fa82 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclDecls.h,v 1.12 1999/05/07 20:07:47 stanton Exp $
+ * RCS: @(#) $Id: tclDecls.h,v 1.12.2.1 1999/05/20 00:03:36 stanton Exp $
*/
#ifndef _TCLDECLS
@@ -1129,6 +1129,17 @@ EXTERN int Tcl_UtfNcmp _ANSI_ARGS_((CONST char * s1,
/* 370 */
EXTERN int Tcl_UtfNcasecmp _ANSI_ARGS_((CONST char * s1,
CONST char * s2, size_t n));
+/* 371 */
+EXTERN int Tcl_StringCaseMatch _ANSI_ARGS_((CONST char * str,
+ CONST char * pattern, int nocase));
+/* 372 */
+EXTERN int Tcl_UniCharIsControl _ANSI_ARGS_((int ch));
+/* 373 */
+EXTERN int Tcl_UniCharIsGraph _ANSI_ARGS_((int ch));
+/* 374 */
+EXTERN int Tcl_UniCharIsPrint _ANSI_ARGS_((int ch));
+/* 375 */
+EXTERN int Tcl_UniCharIsPunct _ANSI_ARGS_((int ch));
typedef struct TclStubHooks {
struct TclPlatStubs *tclPlatStubs;
@@ -1535,6 +1546,11 @@ typedef struct TclStubs {
int (*tcl_Stat) _ANSI_ARGS_((CONST char * path, struct stat * bufPtr)); /* 368 */
int (*tcl_UtfNcmp) _ANSI_ARGS_((CONST char * s1, CONST char * s2, size_t n)); /* 369 */
int (*tcl_UtfNcasecmp) _ANSI_ARGS_((CONST char * s1, CONST char * s2, size_t n)); /* 370 */
+ int (*tcl_StringCaseMatch) _ANSI_ARGS_((CONST char * str, CONST char * pattern, int nocase)); /* 371 */
+ int (*tcl_UniCharIsControl) _ANSI_ARGS_((int ch)); /* 372 */
+ int (*tcl_UniCharIsGraph) _ANSI_ARGS_((int ch)); /* 373 */
+ int (*tcl_UniCharIsPrint) _ANSI_ARGS_((int ch)); /* 374 */
+ int (*tcl_UniCharIsPunct) _ANSI_ARGS_((int ch)); /* 375 */
} TclStubs;
#ifdef __cplusplus
@@ -3023,6 +3039,26 @@ extern TclStubs *tclStubsPtr;
#define Tcl_UtfNcasecmp \
(tclStubsPtr->tcl_UtfNcasecmp) /* 370 */
#endif
+#ifndef Tcl_StringCaseMatch
+#define Tcl_StringCaseMatch \
+ (tclStubsPtr->tcl_StringCaseMatch) /* 371 */
+#endif
+#ifndef Tcl_UniCharIsControl
+#define Tcl_UniCharIsControl \
+ (tclStubsPtr->tcl_UniCharIsControl) /* 372 */
+#endif
+#ifndef Tcl_UniCharIsGraph
+#define Tcl_UniCharIsGraph \
+ (tclStubsPtr->tcl_UniCharIsGraph) /* 373 */
+#endif
+#ifndef Tcl_UniCharIsPrint
+#define Tcl_UniCharIsPrint \
+ (tclStubsPtr->tcl_UniCharIsPrint) /* 374 */
+#endif
+#ifndef Tcl_UniCharIsPunct
+#define Tcl_UniCharIsPunct \
+ (tclStubsPtr->tcl_UniCharIsPunct) /* 375 */
+#endif
#endif /* defined(USE_TCL_STUBS) && !defined(USE_TCL_STUB_PROCS) */
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index 632f4a8..74b0082 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclStubInit.c,v 1.13 1999/05/06 18:46:25 stanton Exp $
+ * RCS: @(#) $Id: tclStubInit.c,v 1.13.2.1 1999/05/20 00:03:37 stanton Exp $
*/
#include "tclInt.h"
@@ -686,6 +686,11 @@ TclStubs tclStubs = {
Tcl_Stat, /* 368 */
Tcl_UtfNcmp, /* 369 */
Tcl_UtfNcasecmp, /* 370 */
+ Tcl_StringCaseMatch, /* 371 */
+ Tcl_UniCharIsControl, /* 372 */
+ Tcl_UniCharIsGraph, /* 373 */
+ Tcl_UniCharIsPrint, /* 374 */
+ Tcl_UniCharIsPunct, /* 375 */
};
/* !END!: Do not edit above this line. */
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 9881f7d..04074a1 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclUtf.c,v 1.5 1999/05/06 18:46:25 stanton Exp $
+ * RCS: @(#) $Id: tclUtf.c,v 1.5.2.1 1999/05/20 00:03:38 stanton Exp $
*/
#include "tclInt.h"
@@ -35,6 +35,16 @@
#define CONNECTOR_BITS (1 << CONNECTOR_PUNCTUATION)
+#define PRINT_BITS (ALPHA_BITS | DIGIT_BITS | SPACE_BITS | \
+ (1 << NON_SPACING_MARK) | (1 << ENCLOSING_MARK) | \
+ (1 << COMBINING_SPACING_MARK) | (1 << LETTER_NUMBER) | \
+ (1 << OTHER_NUMBER) | (1 << CONNECTOR_PUNCTUATION) | \
+ (1 << DASH_PUNCTUATION) | (1 << OPEN_PUNCTUATION) | \
+ (1 << CLOSE_PUNCTUATION) | (1 << INITIAL_QUOTE_PUNCTUATION) | \
+ (1 << FINAL_QUOTE_PUNCTUATION) | (1 << OTHER_PUNCTUATION) | \
+ (1 << MATH_SYMBOL) | (1 << CURRENCY_SYMBOL) | \
+ (1 << MODIFIER_SYMBOL) | (1 << OTHER_SYMBOL))
+
/*
* Unicode characters less than this value are represented by themselves
* in UTF-8 strings.
@@ -1342,6 +1352,29 @@ Tcl_UniCharIsAlpha(ch)
/*
*----------------------------------------------------------------------
*
+ * Tcl_UniCharIsControl --
+ *
+ * Test if a character is a Unicode control character.
+ *
+ * Results:
+ * Returns non-zero if character is a control.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharIsControl(ch)
+ int ch; /* Unicode character to test. */
+{
+ return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == CONTROL);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* Tcl_UniCharIsDigit --
*
* Test if a character is a numeric Unicode character.
@@ -1366,6 +1399,30 @@ Tcl_UniCharIsDigit(ch)
/*
*----------------------------------------------------------------------
*
+ * Tcl_UniCharIsGraph --
+ *
+ * Test if a character is any Unicode print character except space.
+ *
+ * Results:
+ * Returns non-zero if character is printable, but not space.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharIsGraph(ch)
+ int ch; /* Unicode character to test. */
+{
+ register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
+ return (((PRINT_BITS >> category) & 1) && ((unsigned char) ch != ' '));
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* Tcl_UniCharIsLower --
*
* Test if a character is a lowercase Unicode character.
@@ -1389,6 +1446,55 @@ Tcl_UniCharIsLower(ch)
/*
*----------------------------------------------------------------------
*
+ * Tcl_UniCharIsPrint --
+ *
+ * Test if a character is a Unicode print character.
+ *
+ * Results:
+ * Returns non-zero if character is printable.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharIsPrint(ch)
+ int ch; /* Unicode character to test. */
+{
+ register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
+ return ((PRINT_BITS >> category) & 1);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_UniCharIsPunct --
+ *
+ * Test if for any printing char that is neither space or an alnum.
+ *
+ * Results:
+ * Returns non-zero if character is punct.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharIsPunct(ch)
+ int ch; /* Unicode character to test. */
+{
+ register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
+ return (((PRINT_BITS >> category) & 1) && ((unsigned char) ch != ' ')
+ && !(((ALPHA_BITS | DIGIT_BITS) >> category) & 1));
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* Tcl_UniCharIsSpace --
*
* Test if a character is a whitespace Unicode character.
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index fa4c22f..d3a915e 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -10,7 +10,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclUtil.c,v 1.11 1999/05/06 19:21:11 stanton Exp $
+ * RCS: @(#) $Id: tclUtil.c,v 1.11.2.1 1999/05/20 00:03:38 stanton Exp $
*/
#include "tclInt.h"
@@ -1234,6 +1234,167 @@ Tcl_StringMatch(string, pattern)
/*
*----------------------------------------------------------------------
*
+ * Tcl_StringCaseMatch --
+ *
+ * See if a particular string matches a particular pattern.
+ * Allows case insensitivity.
+ *
+ * Results:
+ * The return value is 1 if string matches pattern, and
+ * 0 otherwise. The matching operation permits the following
+ * special characters in the pattern: *?\[] (see the manual
+ * entry for details on what these mean).
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_StringCaseMatch(string, pattern, nocase)
+ CONST char *string; /* String. */
+ CONST char *pattern; /* Pattern, which may contain special
+ * characters. */
+ int nocase; /* 0 for case sensitive, 1 for insensitive */
+{
+ int p, s;
+ CONST char *pstart = pattern;
+ Tcl_UniChar ch1, ch2;
+
+ while (1) {
+ p = *pattern;
+ s = *string;
+
+ /*
+ * See if we're at the end of both the pattern and the string. If
+ * so, we succeeded. If we're at the end of the pattern but not at
+ * the end of the string, we failed.
+ */
+
+ if (p == '\0') {
+ return (s == '\0');
+ }
+ if ((s == '\0') && (p != '*')) {
+ return 0;
+ }
+
+ /* Check for a "*" as the next pattern character. It matches
+ * any substring. We handle this by calling ourselves
+ * recursively for each postfix of string, until either we
+ * match or we reach the end of the string.
+ */
+
+ if (p == '*') {
+ pattern++;
+ if (*pattern == '\0') {
+ return 1;
+ }
+ while (1) {
+ if (Tcl_StringCaseMatch(string, pattern, nocase)) {
+ return 1;
+ }
+ if (*string == '\0') {
+ return 0;
+ }
+ string++;
+ }
+ }
+
+ /* Check for a "?" as the next pattern character. It matches
+ * any single character.
+ */
+
+ if (p == '?') {
+ pattern++;
+ string += Tcl_UtfToUniChar(string, &ch1);
+ continue;
+ }
+
+ /* Check for a "[" as the next pattern character. It is followed
+ * by a list of characters that are acceptable, or by a range
+ * (two characters separated by "-").
+ */
+
+ if (p == '[') {
+ Tcl_UniChar startChar, endChar;
+
+ pattern++;
+ string += Tcl_UtfToUniChar(string, &ch1);
+ if (nocase) {
+ ch1 = Tcl_UniCharToLower(ch1);
+ }
+ while (1) {
+ if ((*pattern == ']') || (*pattern == '\0')) {
+ return 0;
+ }
+ pattern += Tcl_UtfToUniChar(pattern, &startChar);
+ if (nocase) {
+ startChar = Tcl_UniCharToLower(startChar);
+ }
+ if (*pattern == '-') {
+ pattern++;
+ if (*pattern == '\0') {
+ return 0;
+ }
+ pattern += Tcl_UtfToUniChar(pattern, &endChar);
+ if (nocase) {
+ endChar = Tcl_UniCharToLower(endChar);
+ }
+ if (((startChar <= ch1) && (ch1 <= endChar))
+ || ((endChar <= ch1) && (ch1 <= startChar))) {
+ /*
+ * Matches ranges of form [a-z] or [z-a].
+ */
+
+ break;
+ }
+ } else if (startChar == ch1) {
+ break;
+ }
+ }
+ while (*pattern != ']') {
+ if (*pattern == '\0') {
+ pattern = Tcl_UtfPrev(pattern, pstart);
+ break;
+ }
+ pattern++;
+ }
+ pattern++;
+ continue;
+ }
+
+ /* If the next pattern character is '\', just strip off the '\'
+ * so we do exact matching on the character that follows.
+ */
+
+ if (p == '\\') {
+ pattern++;
+ p = *pattern;
+ if (p == '\0') {
+ return 0;
+ }
+ }
+
+ /* There's no special character. Just make sure that the next
+ * bytes of each string match.
+ */
+
+ string += Tcl_UtfToUniChar(string, &ch1);
+ pattern += Tcl_UtfToUniChar(pattern, &ch2);
+ if (nocase) {
+ if (Tcl_UniCharToLower(ch1) != Tcl_UniCharToLower(ch2)) {
+ return 0;
+ }
+ } else if (ch1 != ch2) {
+ return 0;
+ }
+ }
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* Tcl_DStringInit --
*
* Initializes a dynamic string, discarding any previous contents
diff --git a/tests/string.test b/tests/string.test
index 71f83be..5002639 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -11,7 +11,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: string.test,v 1.9 1999/05/06 22:50:04 stanton Exp $
+# RCS: @(#) $Id: string.test,v 1.9.2.1 1999/05/20 00:03:38 stanton Exp $
if {[lsearch [namespace children] ::tcltest] == -1} {
source [file join [pwd] [file dirname [info script]] defs.tcl]
@@ -129,33 +129,48 @@ test string-3.8 {string equal with length, unequal strings} {
string equal -length 2 abc abde
} 1
-test string-4.1 {string first} {
+test string-4.1 {string first, too few args} {
list [catch {string first a} msg] $msg
-} {1 {wrong # args: should be "string first string1 string2"}}
-test string-4.2 {string first} {
+} {1 {wrong # args: should be "string first string1 string2 ?startIndex?"}}
+test string-4.2 {string first, bad args} {
list [catch {string first a b c} msg] $msg
-} {1 {wrong # args: should be "string first string1 string2"}}
-test string-4.3 {string first} {
+} {1 {bad index "c": must be integer or end?-integer?}}
+test string-4.3 {string first, too many args} {
+ list [catch {string first a b 5 d} msg] $msg
+} {1 {wrong # args: should be "string first string1 string2 ?startIndex?"}}
+test string-4.4 {string first} {
string first bq abcdefgbcefgbqrs
} 12
-test string-4.4 {string first} {
+test string-4.5 {string first} {
string fir bcd abcdefgbcefgbqrs
} 1
-test string-4.5 {string first} {
+test string-4.6 {string first} {
string f b abcdefgbcefgbqrs
} 1
-test string-4.6 {string first} {
+test string-4.7 {string first} {
string first xxx x123xx345xxx789xxx012
} 9
-test string-4.7 {string first} {
+test string-4.8 {string first} {
string first "" x123xx345xxx789xxx012
} -1
-test string-4.8 {string first, unicode} {
+test string-4.9 {string first, unicode} {
string first x abc\u7266x
} 4
-test string-4.9 {string first, unicode} {
+test string-4.10 {string first, unicode} {
string first \u7266 abc\u7266x
} 3
+test string-4.11 {string first, start index} {
+ string first \u7266 abc\u7266x 3
+} 3
+test string-4.12 {string first, start index} {
+ string first \u7266 abc\u7266x 4
+} -1
+test string-4.13 {string first, start index} {
+ string first \u7266 abc\u7266x end-2
+} 3
+test string-4.14 {string first, start index} {
+ string first a abcabc end-4
+} 3
test string-5.1 {string index} {
list [catch {string index} msg] $msg
@@ -190,6 +205,9 @@ test string-5.10 {string index, unicode} {
test string-5.11 {string index, unicode} {
string index abc\u7266d 3
} \u7266
+test string-5.12 {string index, unicode over char length, under byte length} {
+ string index \334\374\334\374 6
+} {}
test string-6.1 {string is, too few args} {
list [catch {string is} msg] $msg
@@ -205,10 +223,10 @@ test string-6.4 {string is, too many args} {
} {1 {wrong # args: should be "string is class ?-strict? ?-failindex var? str"}}
test string-6.5 {string is, class check} {
list [catch {string is bogus str} msg] $msg
-} {1 {bad class "bogus": must be alnum, alpha, ascii, boolean, digit, double, false, integer, lower, space, true, upper, or wordchar}}
+} {1 {bad class "bogus": must be alnum, alpha, ascii, control, boolean, digit, double, false, graph, integer, lower, print, punct, space, true, upper, wordchar, or xdigit}}
test string-6.6 {string is, ambiguous class} {
list [catch {string is al str} msg] $msg
-} {1 {ambiguous class "al": must be alnum, alpha, ascii, boolean, digit, double, false, integer, lower, space, true, upper, or wordchar}}
+} {1 {ambiguous class "al": must be alnum, alpha, ascii, control, boolean, digit, double, false, graph, integer, lower, print, punct, space, true, upper, wordchar, or xdigit}}
test string-6.7 {string is alpha, all ok} {
string is alpha -strict -failindex var abc
} 1
@@ -446,39 +464,91 @@ test string-6.82 {string is wordchar, false} {
test string-6.83 {string is wordchar, unicode false} {
list [string is wordchar -fail var abc\u0080def] $var
} {0 3}
+test string-6.84 {string is control} {
+ ## Control chars are in the ranges
+ ## 00..1F && 7F..9F
+ list [string is control -fail var \x00\x01\x10\x1F\x7F\x80\x9F\x60] $var
+} {0 7}
+test string-6.85 {string is control} {
+ string is control \u0100
+} 0
+test string-6.86 {string is graph} {
+ ## graph is any print char, except space
+ list [string is gra -fail var "0123abc!@#\$\u0100 "] $var
+} {0 12}
+test string-6.87 {string is print} {
+ ## basically any printable char
+ list [string is print -fail var "0123abc!@#\$\u0100 \u0010"] $var
+} {0 13}
+test string-6.88 {string is punct} {
+ ## any graph char that isn't alnum
+ list [string is punct -fail var "_=!@#\$\u00beq0"] $var
+} {0 7}
+test string-6.89 {string is xdigit} {
+ list [string is xdigit -fail var 0123456789\u0061bcdefABCDEFg] $var
+} {0 22}
-test string-7.1 {string last} {
+test string-7.1 {string last, too few args} {
list [catch {string last a} msg] $msg
-} {1 {wrong # args: should be "string last string1 string2"}}
-test string-7.2 {string last} {
+} {1 {wrong # args: should be "string last string1 string2 ?startIndex?"}}
+test string-7.2 {string last, bad args} {
list [catch {string last a b c} msg] $msg
-} {1 {wrong # args: should be "string last string1 string2"}}
-test string-7.3 {string last} {
+} {1 {bad index "c": must be integer or end?-integer?}}
+test string-7.3 {string last, too many args} {
+ list [catch {string last a b c d} msg] $msg
+} {1 {wrong # args: should be "string last string1 string2 ?startIndex?"}}
+test string-7.4 {string last} {
string la xxx xxxx123xx345x678
} 1
-test string-7.4 {string last} {
+test string-7.5 {string last} {
string last xx xxxx123xx345x678
} 7
-test string-7.5 {string last} {
+test string-7.6 {string last} {
string las x xxxx123xx345x678
} 12
-test string-7.6 {string last, unicode} {
+test string-7.7 {string last, unicode} {
string las x xxxx12\u7266xx345x678
} 12
-test string-7.7 {string last, unicode} {
+test string-7.8 {string last, unicode} {
+ string las \u7266 xxxx12\u7266xx345x678
+} 6
+test string-7.9 {string last, stop index} {
+ string las \u7266 xxxx12\u7266xx345x678
+} 6
+test string-7.10 {string last, unicode} {
string las \u7266 xxxx12\u7266xx345x678
} 6
+test string-7.11 {string last, start index} {
+ string last \u7266 abc\u7266x 3
+} 3
+test string-7.12 {string last, start index} {
+ string last \u7266 abc\u7266x 2
+} -1
+test string-7.13 {string last, start index} {
+ ## Constrain to last 'a' should work
+ string last ba badbad end-1
+} 3
+test string-7.14 {string last, start index} {
+ ## Constrain to last 'b' should skip last 'ba'
+ string last ba badbad end-2
+} 0
+test string-7.15 {string last, start index} {
+ string last \334a \334ad\334ad 0
+} -1
+test string-7.16 {string last, start index} {
+ string last \334a \334ad\334ad end-1
+} 3
-test cmdMZ-8.1 {Tcl_StringObjCmd: string bytelength} {
+test string-8.1 {string bytelength} {
list [catch {string bytelength} msg] $msg
} {1 {wrong # args: should be "string bytelength string"}}
-test cmdMZ-8.2 {Tcl_StringObjCmd: string bytelength} {
+test string-8.2 {string bytelength} {
list [catch {string bytelength a b} msg] $msg
} {1 {wrong # args: should be "string bytelength string"}}
-test cmdMZ-8.3 {Tcl_StringObjCmd: string bytelength} {
+test string-8.3 {string bytelength} {
string bytelength "\u00c7"
} 2
-test cmdMZ-8.4 {Tcl_StringObjCmd: string bytelength} {
+test string-8.4 {string bytelength} {
string b ""
} 0
@@ -538,12 +608,12 @@ test string-10.13 {string map, -nocase unicode} {
string map -nocase [list \374 ue UE \334] "a\374ueUE\000EU"
} aue\334\334\0EU
-test string-11.1 {string match} {
+test string-11.1 {string match, too few args} {
list [catch {string match a} msg] $msg
-} {1 {wrong # args: should be "string match pattern string"}}
-test string-11.2 {string match} {
- list [catch {string match a b c} msg] $msg
-} {1 {wrong # args: should be "string match pattern string"}}
+} {1 {wrong # args: should be "string match ?-nocase? pattern string"}}
+test string-11.2 {string match, too many args} {
+ list [catch {string match a b c d} msg] $msg
+} {1 {wrong # args: should be "string match ?-nocase? pattern string"}}
test string-11.3 {string match} {
string match abc abc
} 1
@@ -625,6 +695,36 @@ test string-11.28 {string match} {
test string-11.29 {string match} {
string match \[a a
} 1
+test string-11.30 {string match, bad args} {
+ list [catch {string match - b c} msg] $msg
+} {1 {bad option "-": must be -nocase}}
+test string-11.31 {string match case} {
+ string match a A
+} 0
+test string-11.32 {string match nocase} {
+ string match -n a A
+} 1
+test string-11.33 {string match nocase} {
+ string match -nocase a\334 A\374
+} 1
+test string-11.34 {string match nocase} {
+ string match -nocase a*f ABCDEf
+} 1
+test string-11.35 {string match case, false hope} {
+ # This is true because '_' lies between the A-Z and a-z ranges
+ string match {[A-z]} _
+} 1
+test string-11.36 {string match nocase range} {
+ # This is false because although '_' lies between the A-Z and a-z ranges,
+ # we lower case the end points before checking the ranges.
+ string match -nocase {[A-z]} _
+} 0
+test string-11.37 {string match nocase} {
+ string match -nocase {[A-fh-Z]} g
+} 0
+test string-11.38 {string match case, reverse range} {
+ string match {[A-fh-Z]} g
+} 1
test string-12.1 {string range} {
list [catch {string range} msg] $msg
diff --git a/unix/mkLinks b/unix/mkLinks
index 0a07ade..71924e6 100644
--- a/unix/mkLinks
+++ b/unix/mkLinks
@@ -1043,6 +1043,10 @@ if test -r GetStdChan.3; then
rm -f Tcl_SetStdChannel.3
ln GetStdChan.3 Tcl_SetStdChannel.3
fi
+if test -r StrMatch.3; then
+ rm -f Tcl_StringCaseMatch.3
+ ln StrMatch.3 Tcl_StringCaseMatch.3
+fi
if test -r StringObj.3; then
rm -f Tcl_SetStringObj.3
ln StringObj.3 Tcl_SetStringObj.3