summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2002-01-02 13:52:03 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2002-01-02 13:52:03 (GMT)
commit75c6074fa9ed66a5fe5ac9afbd633cf0207e0899 (patch)
treea705d6a0587c54ed5536496f94e042dd0c53b62c
parenteb8476367a6bc605f4c88e534db5caefc769f216 (diff)
downloadtcl-75c6074fa9ed66a5fe5ac9afbd633cf0207e0899.zip
tcl-75c6074fa9ed66a5fe5ac9afbd633cf0207e0899.tar.gz
tcl-75c6074fa9ed66a5fe5ac9afbd633cf0207e0899.tar.bz2
Fixed fault with case-insensitive string matching (Bug#233257) and rewrote
some tests to test what they claimed to be testing.
-rw-r--r--ChangeLog12
-rw-r--r--generic/tclUtf.c5
-rw-r--r--generic/tclUtil.c27
-rw-r--r--tests/string.test6
-rw-r--r--tests/util.test108
5 files changed, 97 insertions, 61 deletions
diff --git a/ChangeLog b/ChangeLog
index b6971fa..34a3b74 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2002-01-02 Donal K. Fellows <fellowsd@cs.man.ac.uk>
+
+ * tests/util.test (Wrapper_Tcl_StringMatch,util-5.*): Rewrote so
+ the test is performed with the right internal function since
+ [string match] no longer uses Tcl_StringCaseMatch internally.
+
+ * tests/string.test (string-11.51):
+ * generic/tclUtf.c (Tcl_UniCharCaseMatch):
+ * generic/tclUtil.c (Tcl_StringCaseMatch): Fault with matching
+ case-insensitive non-ASCII patterns containing upper case
+ characters. [Bug #233257]
+
2001-12-28 Jeff Hobbs <jeffh@ActiveState.com>
* library/init.tcl: make sure env(COMSPEC) on Windows is executed
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 17990db..5bdf557 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclUtf.c,v 1.19 2001/10/16 05:31:19 dgp Exp $
+ * RCS: @(#) $Id: tclUtf.c,v 1.20 2002/01/02 13:52:04 dkf Exp $
*/
#include "tclInt.h"
@@ -1691,6 +1691,9 @@ Tcl_UniCharCaseMatch(string, pattern, nocase)
if (p == 0) {
return 1;
}
+ if (nocase) {
+ p = Tcl_UniCharToLower(p);
+ }
while (1) {
/*
* Optimization for matching - cruise through the string
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 11134be..52a5566 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -11,7 +11,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclUtil.c,v 1.26 2001/11/21 02:36:21 hobbs Exp $
+ * RCS: @(#) $Id: tclUtil.c,v 1.27 2002/01/02 13:52:04 dkf Exp $
*/
#include "tclInt.h"
@@ -1227,6 +1227,10 @@ Tcl_StringCaseMatch(string, pattern, nocase)
if (p == '\0') {
return 1;
}
+ Tcl_UtfToUniChar(pattern, &ch2);
+ if (nocase) {
+ ch2 = Tcl_UniCharToLower(ch2);
+ }
while (1) {
/*
* Optimization for matching - cruise through the string
@@ -1235,16 +1239,25 @@ Tcl_StringCaseMatch(string, pattern, nocase)
*/
if ((p != '[') && (p != '?') && (p != '\\')) {
if (nocase) {
- while (*string && (p != *string)) {
- ch2 = Tcl_UtfToUniChar(string, &ch1);
- if (p == Tcl_UniCharToLower(ch1)) {
+ while (*string) {
+ int charLen = Tcl_UtfToUniChar(string, &ch1);
+ if (ch2==ch1 || ch2==Tcl_UniCharToLower(ch1)) {
break;
}
- string += ch2;
+ string += charLen;
}
} else {
- while (*string && (p != *string)) {
- string += Tcl_UtfToUniChar(string, &ch1);
+ /*
+ * There's no point in trying to make this code
+ * shorter, as the number of bytes you want to
+ * compare each time is non-constant.
+ */
+ while (*string) {
+ int charLen = Tcl_UtfToUniChar(string, &ch1);
+ if (ch2 == ch1) {
+ break;
+ }
+ string += charLen;
}
}
}
diff --git a/tests/string.test b/tests/string.test
index 15b88d7..2ab91b2 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -12,7 +12,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: string.test,v 1.30 2001/11/14 23:16:36 hobbs Exp $
+# RCS: @(#) $Id: string.test,v 1.31 2002/01/02 13:52:04 dkf Exp $
if {[lsearch [namespace children] ::tcltest] == -1} {
package require tcltest
@@ -855,6 +855,10 @@ test string-11.49 {string match, *special case} {
test string-11.50 {string match, *special case} {
string match "\\" "\\"
} 0
+test string-11.51 {string match; *, -nocase and UTF-8} {
+ string match -nocase [binary format I 717316707] \
+ [binary format I 2028036707]
+} 1
test string-12.1 {string range} {
diff --git a/tests/util.test b/tests/util.test
index d39a6da..fe94732 100644
--- a/tests/util.test
+++ b/tests/util.test
@@ -7,7 +7,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: util.test,v 1.9 2001/09/19 08:52:46 dkf Exp $
+# RCS: @(#) $Id: util.test,v 1.10 2002/01/02 13:52:04 dkf Exp $
if {[lsearch [namespace children] ::tcltest] == -1} {
package require tcltest
@@ -67,180 +67,184 @@ test util-4.6 {Tcl_ConcatObj - utf-8 sequence with "whitespace" char} {
concat \xe0
} \xe0
+proc Wrapper_Tcl_StringMatch {pattern string} {
+ # Forces use of Tcl_StringMatch, not Tcl_UniCharCaseMatch
+ switch -glob -- $string $pattern {return 1} default {return 0}
+}
test util-5.1 {Tcl_StringMatch} {
- string match ab*c abc
+ Wrapper_Tcl_StringMatch ab*c abc
} 1
test util-5.2 {Tcl_StringMatch} {
- string match ab**c abc
+ Wrapper_Tcl_StringMatch ab**c abc
} 1
test util-5.3 {Tcl_StringMatch} {
- string match ab* abcdef
+ Wrapper_Tcl_StringMatch ab* abcdef
} 1
test util-5.4 {Tcl_StringMatch} {
- string match *c abc
+ Wrapper_Tcl_StringMatch *c abc
} 1
test util-5.5 {Tcl_StringMatch} {
- string match *3*6*9 0123456789
+ Wrapper_Tcl_StringMatch *3*6*9 0123456789
} 1
test util-5.6 {Tcl_StringMatch} {
- string match *3*6*9 01234567890
+ Wrapper_Tcl_StringMatch *3*6*9 01234567890
} 0
test util-5.7 {Tcl_StringMatch: UTF-8} {
- string match *u \u4e4fu
+ Wrapper_Tcl_StringMatch *u \u4e4fu
} 1
test util-5.8 {Tcl_StringMatch} {
- string match a?c abc
+ Wrapper_Tcl_StringMatch a?c abc
} 1
test util-5.9 {Tcl_StringMatch: UTF-8} {
# skip one character in string
- string match a?c a\u4e4fc
+ Wrapper_Tcl_StringMatch a?c a\u4e4fc
} 1
test util-5.10 {Tcl_StringMatch} {
- string match a??c abc
+ Wrapper_Tcl_StringMatch a??c abc
} 0
test util-5.11 {Tcl_StringMatch} {
- string match ?1??4???8? 0123456789
+ Wrapper_Tcl_StringMatch ?1??4???8? 0123456789
} 1
test util-5.12 {Tcl_StringMatch} {
- string match {[abc]bc} abc
+ Wrapper_Tcl_StringMatch {[abc]bc} abc
} 1
test util-5.13 {Tcl_StringMatch: UTF-8} {
# string += Tcl_UtfToUniChar(string, &ch);
- string match "\[\u4e4fxy\]bc" "\u4e4fbc"
+ Wrapper_Tcl_StringMatch "\[\u4e4fxy\]bc" "\u4e4fbc"
} 1
test util-5.14 {Tcl_StringMatch} {
# if ((*pattern == ']') || (*pattern == '\0'))
# badly formed pattern
- string match {[]} {[]}
+ Wrapper_Tcl_StringMatch {[]} {[]}
} 0
test util-5.15 {Tcl_StringMatch} {
# if ((*pattern == ']') || (*pattern == '\0'))
# badly formed pattern
- string match {[} {[}
+ Wrapper_Tcl_StringMatch {[} {[}
} 0
test util-5.16 {Tcl_StringMatch} {
- string match {a[abc]c} abc
+ Wrapper_Tcl_StringMatch {a[abc]c} abc
} 1
test util-5.17 {Tcl_StringMatch: UTF-8} {
# pattern += Tcl_UtfToUniChar(pattern, &endChar);
# get 1 UTF-8 character
- string match "a\[a\u4e4fc]c" "a\u4e4fc"
+ Wrapper_Tcl_StringMatch "a\[a\u4e4fc]c" "a\u4e4fc"
} 1
test util-5.18 {Tcl_StringMatch: UTF-8} {
# pattern += Tcl_UtfToUniChar(pattern, &endChar);
# proper advance: wrong answer would match on UTF trail byte of \u4e4f
- string match {a[a\u4e4fc]c} [bytestring a\u008fc]
+ Wrapper_Tcl_StringMatch {a[a\u4e4fc]c} [bytestring a\u008fc]
} 0
test util-5.19 {Tcl_StringMatch: UTF-8} {
# pattern += Tcl_UtfToUniChar(pattern, &endChar);
# proper advance.
- string match {a[a\u4e4fc]c} "acc"
+ Wrapper_Tcl_StringMatch {a[a\u4e4fc]c} "acc"
} 1
test util-5.20 {Tcl_StringMatch} {
- string match {a[xyz]c} abc
+ Wrapper_Tcl_StringMatch {a[xyz]c} abc
} 0
test util-5.21 {Tcl_StringMatch} {
- string match {12[2-7]45} 12345
+ Wrapper_Tcl_StringMatch {12[2-7]45} 12345
} 1
test util-5.22 {Tcl_StringMatch: UTF-8 range} {
- string match "\[\u4e00-\u4e4f]" "0"
+ Wrapper_Tcl_StringMatch "\[\u4e00-\u4e4f]" "0"
} 0
test util-5.23 {Tcl_StringMatch: UTF-8 range} {
- string match "\[\u4e00-\u4e4f]" "\u4e33"
+ Wrapper_Tcl_StringMatch "\[\u4e00-\u4e4f]" "\u4e33"
} 1
test util-5.24 {Tcl_StringMatch: UTF-8 range} {
- string match "\[\u4e00-\u4e4f]" "\uff08"
+ Wrapper_Tcl_StringMatch "\[\u4e00-\u4e4f]" "\uff08"
} 0
test util-5.25 {Tcl_StringMatch} {
- string match {12[ab2-4cd]45} 12345
+ Wrapper_Tcl_StringMatch {12[ab2-4cd]45} 12345
} 1
test util-5.26 {Tcl_StringMatch} {
- string match {12[ab2-4cd]45} 12b45
+ Wrapper_Tcl_StringMatch {12[ab2-4cd]45} 12b45
} 1
test util-5.27 {Tcl_StringMatch} {
- string match {12[ab2-4cd]45} 12d45
+ Wrapper_Tcl_StringMatch {12[ab2-4cd]45} 12d45
} 1
test util-5.28 {Tcl_StringMatch} {
- string match {12[ab2-4cd]45} 12145
+ Wrapper_Tcl_StringMatch {12[ab2-4cd]45} 12145
} 0
test util-5.29 {Tcl_StringMatch} {
- string match {12[ab2-4cd]45} 12545
+ Wrapper_Tcl_StringMatch {12[ab2-4cd]45} 12545
} 0
test util-5.30 {Tcl_StringMatch: forwards range} {
- string match {[k-w]} "z"
+ Wrapper_Tcl_StringMatch {[k-w]} "z"
} 0
test util-5.31 {Tcl_StringMatch: forwards range} {
- string match {[k-w]} "w"
+ Wrapper_Tcl_StringMatch {[k-w]} "w"
} 1
test util-5.32 {Tcl_StringMatch: forwards range} {
- string match {[k-w]} "r"
+ Wrapper_Tcl_StringMatch {[k-w]} "r"
} 1
test util-5.33 {Tcl_StringMatch: forwards range} {
- string match {[k-w]} "k"
+ Wrapper_Tcl_StringMatch {[k-w]} "k"
} 1
test util-5.34 {Tcl_StringMatch: forwards range} {
- string match {[k-w]} "a"
+ Wrapper_Tcl_StringMatch {[k-w]} "a"
} 0
test util-5.35 {Tcl_StringMatch: reverse range} {
- string match {[w-k]} "z"
+ Wrapper_Tcl_StringMatch {[w-k]} "z"
} 0
test util-5.36 {Tcl_StringMatch: reverse range} {
- string match {[w-k]} "w"
+ Wrapper_Tcl_StringMatch {[w-k]} "w"
} 1
test util-5.37 {Tcl_StringMatch: reverse range} {
- string match {[w-k]} "r"
+ Wrapper_Tcl_StringMatch {[w-k]} "r"
} 1
test util-5.38 {Tcl_StringMatch: reverse range} {
- string match {[w-k]} "k"
+ Wrapper_Tcl_StringMatch {[w-k]} "k"
} 1
test util-5.39 {Tcl_StringMatch: reverse range} {
- string match {[w-k]} "a"
+ Wrapper_Tcl_StringMatch {[w-k]} "a"
} 0
test util-5.40 {Tcl_StringMatch: skip correct number of ']'} {
- string match {[A-]x} Ax
+ Wrapper_Tcl_StringMatch {[A-]x} Ax
} 0
test util-5.41 {Tcl_StringMatch: skip correct number of ']'} {
- string match {[A-]]x} Ax
+ Wrapper_Tcl_StringMatch {[A-]]x} Ax
} 1
test util-5.42 {Tcl_StringMatch: skip correct number of ']'} {
- string match {[A-]]x} \ue1x
+ Wrapper_Tcl_StringMatch {[A-]]x} \ue1x
} 0
test util-5.43 {Tcl_StringMatch: skip correct number of ']'} {
- string match \[A-]\ue1]x \ue1x
+ Wrapper_Tcl_StringMatch \[A-]\ue1]x \ue1x
} 1
test util-5.44 {Tcl_StringMatch: skip correct number of ']'} {
- string match {[A-]h]x} hx
+ Wrapper_Tcl_StringMatch {[A-]h]x} hx
} 1
test util-5.45 {Tcl_StringMatch} {
# if (*pattern == '\0')
# badly formed pattern, still treats as a set
- string match {[a} a
+ Wrapper_Tcl_StringMatch {[a} a
} 1
test util-5.46 {Tcl_StringMatch} {
- string match {a\*b} a*b
+ Wrapper_Tcl_StringMatch {a\*b} a*b
} 1
test util-5.47 {Tcl_StringMatch} {
- string match {a\*b} ab
+ Wrapper_Tcl_StringMatch {a\*b} ab
} 0
test util-5.48 {Tcl_StringMatch} {
- string match {a\*\?\[\]\\\x} "a*?\[\]\\x"
+ Wrapper_Tcl_StringMatch {a\*\?\[\]\\\x} "a*?\[\]\\x"
} 1
test util-5.49 {Tcl_StringMatch} {
- string match ** ""
+ Wrapper_Tcl_StringMatch ** ""
} 1
test util-5.50 {Tcl_StringMatch} {
- string match *. ""
+ Wrapper_Tcl_StringMatch *. ""
} 0
test util-5.51 {Tcl_StringMatch} {
- string match "" ""
+ Wrapper_Tcl_StringMatch "" ""
} 1
test util-6.1 {Tcl_PrintDouble - using tcl_precision} {