summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog13
-rw-r--r--generic/tclCmdMZ.c11
-rw-r--r--generic/tclExecute.c12
-rw-r--r--generic/tclInt.decls9
-rw-r--r--generic/tclIntDecls.h12
-rw-r--r--generic/tclStubInit.c3
-rw-r--r--generic/tclUtf.c195
-rw-r--r--tests/string.test37
-rw-r--r--tests/stringComp.test48
9 files changed, 324 insertions, 16 deletions
diff --git a/ChangeLog b/ChangeLog
index 447cbba..8a9a233 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2003-02-17 Jeff Hobbs <jeffh@ActiveState.com>
+
+ * generic/tclExecute.c (TclExecuteByteCode INST_STR_MATCH):
+ * generic/tclCmdMZ.c (Tcl_StringObjCmd STR_MATCH):
+ * generic/tclUtf.c (TclUniCharMatch):
+ * generic/tclInt.decls: add private TclUniCharMatch function that
+ * generic/tclIntDecls.h: does string match on counted unicode
+ * generic/tclStubInit.c: strings. Tcl_UniCharCaseMatch has the
+ * tests/string.test: failing that it can't handle strings or
+ * tests/stringComp.test: patterns with embedded NULLs. Added
+ tests that actually try strings/pats with NULLs. TclUniCharMatch
+ should be TIPed and made public in the next minor version rev.
+
2003-02-17 Miguel Sofer <msofer@users.sf.net>
* generic/tclBasic.c (TclRenameCommand): 'oldFullName' object was
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index d3deaae..2339965 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -14,7 +14,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.80 2003/01/17 14:19:44 vincentdarley Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.81 2003/02/18 02:25:43 hobbs Exp $
*/
#include "tclInt.h"
@@ -2008,6 +2008,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
break;
}
case STR_MATCH: {
+ Tcl_UniChar *ustring1, *ustring2;
int nocase = 0;
if (objc < 4 || objc > 5) {
@@ -2027,10 +2028,10 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
return TCL_ERROR;
}
}
-
- Tcl_SetBooleanObj(resultPtr,
- Tcl_UniCharCaseMatch(Tcl_GetUnicode(objv[objc-1]),
- Tcl_GetUnicode(objv[objc-2]), nocase));
+ ustring1 = Tcl_GetUnicodeFromObj(objv[objc-1], &length1);
+ ustring2 = Tcl_GetUnicodeFromObj(objv[objc-2], &length2);
+ Tcl_SetBooleanObj(resultPtr, TclUniCharMatch(ustring1, length1,
+ ustring2, length2, nocase));
break;
}
case STR_RANGE: {
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index 93bf3a9..3333959 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -11,7 +11,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclExecute.c,v 1.92 2003/02/06 22:44:57 mdejong Exp $
+ * RCS: @(#) $Id: tclExecute.c,v 1.93 2003/02/18 02:25:44 hobbs Exp $
*/
#include "tclInt.h"
@@ -2650,10 +2650,16 @@ TclExecuteByteCode(interp, codePtr)
* Check that at least one of the objects is Unicode before
* promoting both.
*/
+
if ((valuePtr->typePtr == &tclStringType)
|| (value2Ptr->typePtr == &tclStringType)) {
- match = Tcl_UniCharCaseMatch(Tcl_GetUnicode(valuePtr),
- Tcl_GetUnicode(value2Ptr), nocase);
+ Tcl_UniChar *ustring1, *ustring2;
+ int length1, length2;
+
+ ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length1);
+ ustring2 = Tcl_GetUnicodeFromObj(value2Ptr, &length2);
+ match = TclUniCharMatch(ustring1, length1, ustring2, length2,
+ nocase);
} else {
match = Tcl_StringCaseMatch(TclGetString(valuePtr),
TclGetString(value2Ptr), nocase);
diff --git a/generic/tclInt.decls b/generic/tclInt.decls
index f2b2f99..8a68f70 100644
--- a/generic/tclInt.decls
+++ b/generic/tclInt.decls
@@ -12,7 +12,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: tclInt.decls,v 1.58 2002/12/06 23:22:59 hobbs Exp $
+# RCS: @(#) $Id: tclInt.decls,v 1.59 2003/02/18 02:25:45 hobbs Exp $
library tcl
@@ -690,6 +690,13 @@ declare 172 generic {
int TclInThreadExit(void)
}
+# added for 8.4.2
+
+declare 173 generic {
+ int TclUniCharMatch (CONST Tcl_UniChar *string, int strLen, \
+ CONST Tcl_UniChar *pattern, int ptnLen, int nocase)
+}
+
##############################################################################
# Define the platform specific internal Tcl interface. These functions are
diff --git a/generic/tclIntDecls.h b/generic/tclIntDecls.h
index 5c23144..e41dca6 100644
--- a/generic/tclIntDecls.h
+++ b/generic/tclIntDecls.h
@@ -11,7 +11,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclIntDecls.h,v 1.48 2002/11/07 02:13:36 mdejong Exp $
+ * RCS: @(#) $Id: tclIntDecls.h,v 1.49 2003/02/18 02:25:45 hobbs Exp $
*/
#ifndef _TCLINTDECLS
@@ -498,6 +498,11 @@ EXTERN int TclCheckExecutionTraces _ANSI_ARGS_((
Tcl_Obj *CONST objv[]));
/* 172 */
EXTERN int TclInThreadExit _ANSI_ARGS_((void));
+/* 173 */
+EXTERN int TclUniCharMatch _ANSI_ARGS_((
+ CONST Tcl_UniChar * string, int strLen,
+ CONST Tcl_UniChar * pattern, int ptnLen,
+ int nocase));
typedef struct TclIntStubs {
int magic;
@@ -700,6 +705,7 @@ typedef struct TclIntStubs {
int (*tclCheckInterpTraces) _ANSI_ARGS_((Tcl_Interp * interp, CONST char * command, int numChars, Command * cmdPtr, int result, int traceFlags, int objc, Tcl_Obj *CONST objv[])); /* 170 */
int (*tclCheckExecutionTraces) _ANSI_ARGS_((Tcl_Interp * interp, CONST char * command, int numChars, Command * cmdPtr, int result, int traceFlags, int objc, Tcl_Obj *CONST objv[])); /* 171 */
int (*tclInThreadExit) _ANSI_ARGS_((void)); /* 172 */
+ int (*tclUniCharMatch) _ANSI_ARGS_((CONST Tcl_UniChar * string, int strLen, CONST Tcl_UniChar * pattern, int ptnLen, int nocase)); /* 173 */
} TclIntStubs;
#ifdef __cplusplus
@@ -1306,6 +1312,10 @@ extern TclIntStubs *tclIntStubsPtr;
#define TclInThreadExit \
(tclIntStubsPtr->tclInThreadExit) /* 172 */
#endif
+#ifndef TclUniCharMatch
+#define TclUniCharMatch \
+ (tclIntStubsPtr->tclUniCharMatch) /* 173 */
+#endif
#endif /* defined(USE_TCL_STUBS) && !defined(USE_TCL_STUB_PROCS) */
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index c25f053..f303e0a 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclStubInit.c,v 1.78 2002/12/06 23:22:59 hobbs Exp $
+ * RCS: @(#) $Id: tclStubInit.c,v 1.79 2003/02/18 02:25:45 hobbs Exp $
*/
#include "tclInt.h"
@@ -244,6 +244,7 @@ TclIntStubs tclIntStubs = {
TclCheckInterpTraces, /* 170 */
TclCheckExecutionTraces, /* 171 */
TclInThreadExit, /* 172 */
+ TclUniCharMatch, /* 173 */
};
TclIntPlatStubs tclIntPlatStubs = {
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 72a23ca..6c6835c 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclUtf.c,v 1.29 2002/11/12 02:26:29 hobbs Exp $
+ * RCS: @(#) $Id: tclUtf.c,v 1.30 2003/02/18 02:25:45 hobbs Exp $
*/
#include "tclInt.h"
@@ -1584,7 +1584,10 @@ Tcl_UniCharIsWordChar(ch)
*
* See if a particular Unicode string matches a particular pattern.
* Allows case insensitivity. This is the Unicode equivalent of
- * the char* Tcl_StringCaseMatch.
+ * the char* Tcl_StringCaseMatch. The UniChar strings must be
+ * NULL-terminated. This has no provision for counted UniChar
+ * strings, thus should not be used where NULLs are expected in the
+ * UniChar string. Use TclUniCharMatch where possible.
*
* Results:
* The return value is 1 if string matches pattern, and
@@ -1755,3 +1758,191 @@ Tcl_UniCharCaseMatch(string, pattern, nocase)
pattern++;
}
}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclUniCharMatch --
+ *
+ * See if a particular Unicode string matches a particular pattern.
+ * Allows case insensitivity. This is the Unicode equivalent of the
+ * char* Tcl_StringCaseMatch. This variant of Tcl_UniCharCaseMatch
+ * uses counted Strings, so embedded NULLs are allowed.
+ *
+ * Results:
+ * The return value is 1 if string matches pattern, and
+ * 0 otherwise. The matching operation permits the following
+ * special characters in the pattern: *?\[] (see the manual
+ * entry for details on what these mean).
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclUniCharMatch(string, strLen, pattern, ptnLen, nocase)
+ CONST Tcl_UniChar *string; /* Unicode String. */
+ int strLen; /* length of String */
+ CONST Tcl_UniChar *pattern; /* Pattern, which may contain special
+ * characters. */
+ int ptnLen; /* length of Pattern */
+ int nocase; /* 0 for case sensitive, 1 for insensitive */
+{
+ CONST Tcl_UniChar *stringEnd, *patternEnd;
+ Tcl_UniChar p;
+
+ stringEnd = string + strLen;
+ patternEnd = pattern + ptnLen;
+
+ while (1) {
+ /*
+ * See if we're at the end of both the pattern and the string. If
+ * so, we succeeded. If we're at the end of the pattern but not at
+ * the end of the string, we failed.
+ */
+
+ if (pattern == patternEnd) {
+ return (string == stringEnd);
+ }
+ p = *pattern;
+ if ((string == stringEnd) && (p != '*')) {
+ return 0;
+ }
+
+ /*
+ * Check for a "*" as the next pattern character. It matches any
+ * substring. We handle this by skipping all the characters up to the
+ * next matching one in the pattern, and then calling ourselves
+ * recursively for each postfix of string, until either we match or we
+ * reach the end of the string.
+ */
+
+ if (p == '*') {
+ /*
+ * Skip all successive *'s in the pattern
+ */
+ while (*(++pattern) == '*') {}
+ if (pattern == patternEnd) {
+ return 1;
+ }
+ p = *pattern;
+ if (nocase) {
+ p = Tcl_UniCharToLower(p);
+ }
+ while (1) {
+ /*
+ * Optimization for matching - cruise through the string
+ * quickly if the next char in the pattern isn't a special
+ * character
+ */
+ if ((p != '[') && (p != '?') && (p != '\\')) {
+ if (nocase) {
+ while ((string < stringEnd) && (p != *string)
+ && (p != Tcl_UniCharToLower(*string))) {
+ string++;
+ }
+ } else {
+ while ((string < stringEnd) && (p != *string)) {
+ string++;
+ }
+ }
+ }
+ if (TclUniCharMatch(string, stringEnd - string,
+ pattern, patternEnd - pattern, nocase)) {
+ return 1;
+ }
+ if (string == stringEnd) {
+ return 0;
+ }
+ string++;
+ }
+ }
+
+ /*
+ * Check for a "?" as the next pattern character. It matches
+ * any single character.
+ */
+
+ if (p == '?') {
+ pattern++;
+ string++;
+ continue;
+ }
+
+ /*
+ * Check for a "[" as the next pattern character. It is followed
+ * by a list of characters that are acceptable, or by a range
+ * (two characters separated by "-").
+ */
+
+ if (p == '[') {
+ Tcl_UniChar ch1, startChar, endChar;
+
+ pattern++;
+ ch1 = (nocase ? Tcl_UniCharToLower(*string) : *string);
+ string++;
+ while (1) {
+ if ((*pattern == ']') || (pattern == patternEnd)) {
+ return 0;
+ }
+ startChar = (nocase ? Tcl_UniCharToLower(*pattern) : *pattern);
+ pattern++;
+ if (*pattern == '-') {
+ pattern++;
+ if (pattern == patternEnd) {
+ return 0;
+ }
+ endChar = (nocase ? Tcl_UniCharToLower(*pattern)
+ : *pattern);
+ pattern++;
+ if (((startChar <= ch1) && (ch1 <= endChar))
+ || ((endChar <= ch1) && (ch1 <= startChar))) {
+ /*
+ * Matches ranges of form [a-z] or [z-a].
+ */
+ break;
+ }
+ } else if (startChar == ch1) {
+ break;
+ }
+ }
+ while (*pattern != ']') {
+ if (pattern == patternEnd) {
+ pattern--;
+ break;
+ }
+ pattern++;
+ }
+ pattern++;
+ continue;
+ }
+
+ /*
+ * If the next pattern character is '\', just strip off the '\'
+ * so we do exact matching on the character that follows.
+ */
+
+ if (p == '\\') {
+ if (++pattern == patternEnd) {
+ return 0;
+ }
+ }
+
+ /*
+ * There's no special character. Just make sure that the next
+ * bytes of each string match.
+ */
+
+ if (nocase) {
+ if (Tcl_UniCharToLower(*string) != Tcl_UniCharToLower(*pattern)) {
+ return 0;
+ }
+ } else if (*string != *pattern) {
+ return 0;
+ }
+ string++;
+ pattern++;
+ }
+}
diff --git a/tests/string.test b/tests/string.test
index bffb623..ae84010 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -12,7 +12,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: string.test,v 1.35 2002/11/19 02:34:50 hobbs Exp $
+# RCS: @(#) $Id: string.test,v 1.36 2003/02/18 02:25:45 hobbs Exp $
if {[lsearch [namespace children] ::tcltest] == -1} {
package require tcltest
@@ -895,7 +895,40 @@ test string-11.51 {string match; *, -nocase and UTF-8} {
string match -nocase [binary format I 717316707] \
[binary format I 2028036707]
} 1
-
+test string-11.52 {string match, null char in string} {
+ set out ""
+ set ptn "*abc*"
+ foreach elem [list "\u0000@abc" "@abc" "\u0000@abc\u0000" "blahabcblah"] {
+ lappend out [string match $ptn $elem]
+ }
+ set out
+} {1 1 1 1}
+test string-11.53 {string match, null char in pattern} {
+ set out ""
+ foreach {ptn elem} [list \
+ "*\u0000abc\u0000" "\u0000abc\u0000" \
+ "*\u0000abc\u0000" "\u0000abc\u0000ef" \
+ "*\u0000abc\u0000*" "\u0000abc\u0000ef" \
+ "*\u0000abc\u0000" "@\u0000abc\u0000ef" \
+ "*\u0000abc\u0000*" "@\u0000abc\u0000ef" \
+ ] {
+ lappend out [string match $ptn $elem]
+ }
+ set out
+} {1 0 1 0 1}
+test string-11.54 {string match, failure} {
+ set longString ""
+ for {set i 0} {$i < 10} {incr i} {
+ append longString "abcdefghijklmnopqrstuvwxy\u0000z01234567890123"
+ }
+ string first $longString 123
+ list [string match *cba* $longString] \
+ [string match *a*l*\u0000* $longString] \
+ [string match *a*l*\u0000*123 $longString] \
+ [string match *a*l*\u0000*123* $longString] \
+ [string match *a*l*\u0000*cba* $longString] \
+ [string match *===* $longString]
+} {0 1 1 1 0 0}
test string-12.1 {string range} {
list [catch {string range} msg] $msg
diff --git a/tests/stringComp.test b/tests/stringComp.test
index 20779e4..14b0107 100644
--- a/tests/stringComp.test
+++ b/tests/stringComp.test
@@ -15,7 +15,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: stringComp.test,v 1.5 2002/05/29 09:09:00 hobbs Exp $
+# RCS: @(#) $Id: stringComp.test,v 1.6 2003/02/18 02:25:45 hobbs Exp $
if {[lsearch [namespace children] ::tcltest] == -1} {
package require tcltest
@@ -643,6 +643,52 @@ test string-11.50 {string match, *special case} {
proc foo {} {string match "\\" "\\"}
foo
} 0
+test string-11.51 {string match; *, -nocase and UTF-8} {
+ proc foo {} {string match -nocase [binary format I 717316707] \
+ [binary format I 2028036707]}
+ foo
+} 1
+test string-11.52 {string match, null char in string} {
+ proc foo {} {
+ set ptn "*abc*"
+ foreach elem [list "\u0000@abc" "@abc" "\u0000@abc\u0000" "blahabcblah"] {
+ lappend out [string match $ptn $elem]
+ }
+ set out
+ }
+ foo
+} {1 1 1 1}
+test string-11.53 {string match, null char in pattern} {
+ proc foo {} {
+ set out ""
+ foreach {ptn elem} [list \
+ "*\u0000abc\u0000" "\u0000abc\u0000" \
+ "*\u0000abc\u0000" "\u0000abc\u0000ef" \
+ "*\u0000abc\u0000*" "\u0000abc\u0000ef" \
+ "*\u0000abc\u0000" "@\u0000abc\u0000ef" \
+ "*\u0000abc\u0000*" "@\u0000abc\u0000ef" \
+ ] {
+ lappend out [string match $ptn $elem]
+ }
+ set out
+ }
+ foo
+} {1 0 1 0 1}
+test string-11.54 {string match, failure} {
+ proc foo {} {
+ set longString ""
+ for {set i 0} {$i < 10} {incr i} {
+ append longString "abcdefghijklmnopqrstuvwxy\u0000z01234567890123"
+ }
+ list [string match *cba* $longString] \
+ [string match *a*l*\u0000* $longString] \
+ [string match *a*l*\u0000*123 $longString] \
+ [string match *a*l*\u0000*123* $longString] \
+ [string match *a*l*\u0000*cba* $longString] \
+ [string match *===* $longString]
+ }
+ foo
+} {0 1 1 1 0 0}
## string range
## not yet bc