From 75c6074fa9ed66a5fe5ac9afbd633cf0207e0899 Mon Sep 17 00:00:00 2001 From: dkf Date: Wed, 2 Jan 2002 13:52:03 +0000 Subject: Fixed fault with case-insensitive string matching (Bug#233257) and rewrote some tests to test what they claimed to be testing. --- ChangeLog | 12 ++++++ generic/tclUtf.c | 5 ++- generic/tclUtil.c | 27 ++++++++++---- tests/string.test | 6 ++- tests/util.test | 108 ++++++++++++++++++++++++++++-------------------------- 5 files changed, 97 insertions(+), 61 deletions(-) diff --git a/ChangeLog b/ChangeLog index b6971fa..34a3b74 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2002-01-02 Donal K. Fellows + + * tests/util.test (Wrapper_Tcl_StringMatch,util-5.*): Rewrote so + the test is performed with the right internal function since + [string match] no longer uses Tcl_StringCaseMatch internally. + + * tests/string.test (string-11.51): + * generic/tclUtf.c (Tcl_UniCharCaseMatch): + * generic/tclUtil.c (Tcl_StringCaseMatch): Fault with matching + case-insensitive non-ASCII patterns containing upper case + characters. [Bug #233257] + 2001-12-28 Jeff Hobbs * library/init.tcl: make sure env(COMSPEC) on Windows is executed diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 17990db..5bdf557 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -8,7 +8,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclUtf.c,v 1.19 2001/10/16 05:31:19 dgp Exp $ + * RCS: @(#) $Id: tclUtf.c,v 1.20 2002/01/02 13:52:04 dkf Exp $ */ #include "tclInt.h" @@ -1691,6 +1691,9 @@ Tcl_UniCharCaseMatch(string, pattern, nocase) if (p == 0) { return 1; } + if (nocase) { + p = Tcl_UniCharToLower(p); + } while (1) { /* * Optimization for matching - cruise through the string diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 11134be..52a5566 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -11,7 +11,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclUtil.c,v 1.26 2001/11/21 02:36:21 hobbs Exp $ + * RCS: @(#) $Id: tclUtil.c,v 1.27 2002/01/02 13:52:04 dkf Exp $ */ #include "tclInt.h" @@ -1227,6 +1227,10 @@ Tcl_StringCaseMatch(string, pattern, nocase) if (p == '\0') { return 1; } + Tcl_UtfToUniChar(pattern, &ch2); + if (nocase) { + ch2 = Tcl_UniCharToLower(ch2); + } while (1) { /* * Optimization for matching - cruise through the string @@ -1235,16 +1239,25 @@ Tcl_StringCaseMatch(string, pattern, nocase) */ if ((p != '[') && (p != '?') && (p != '\\')) { if (nocase) { - while (*string && (p != *string)) { - ch2 = Tcl_UtfToUniChar(string, &ch1); - if (p == Tcl_UniCharToLower(ch1)) { + while (*string) { + int charLen = Tcl_UtfToUniChar(string, &ch1); + if (ch2==ch1 || ch2==Tcl_UniCharToLower(ch1)) { break; } - string += ch2; + string += charLen; } } else { - while (*string && (p != *string)) { - string += Tcl_UtfToUniChar(string, &ch1); + /* + * There's no point in trying to make this code + * shorter, as the number of bytes you want to + * compare each time is non-constant. + */ + while (*string) { + int charLen = Tcl_UtfToUniChar(string, &ch1); + if (ch2 == ch1) { + break; + } + string += charLen; } } } diff --git a/tests/string.test b/tests/string.test index 15b88d7..2ab91b2 100644 --- a/tests/string.test +++ b/tests/string.test @@ -12,7 +12,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: string.test,v 1.30 2001/11/14 23:16:36 hobbs Exp $ +# RCS: @(#) $Id: string.test,v 1.31 2002/01/02 13:52:04 dkf Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest @@ -855,6 +855,10 @@ test string-11.49 {string match, *special case} { test string-11.50 {string match, *special case} { string match "\\" "\\" } 0 +test string-11.51 {string match; *, -nocase and UTF-8} { + string match -nocase [binary format I 717316707] \ + [binary format I 2028036707] +} 1 test string-12.1 {string range} { diff --git a/tests/util.test b/tests/util.test index d39a6da..fe94732 100644 --- a/tests/util.test +++ b/tests/util.test @@ -7,7 +7,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: util.test,v 1.9 2001/09/19 08:52:46 dkf Exp $ +# RCS: @(#) $Id: util.test,v 1.10 2002/01/02 13:52:04 dkf Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest @@ -67,180 +67,184 @@ test util-4.6 {Tcl_ConcatObj - utf-8 sequence with "whitespace" char} { concat \xe0 } \xe0 +proc Wrapper_Tcl_StringMatch {pattern string} { + # Forces use of Tcl_StringMatch, not Tcl_UniCharCaseMatch + switch -glob -- $string $pattern {return 1} default {return 0} +} test util-5.1 {Tcl_StringMatch} { - string match ab*c abc + Wrapper_Tcl_StringMatch ab*c abc } 1 test util-5.2 {Tcl_StringMatch} { - string match ab**c abc + Wrapper_Tcl_StringMatch ab**c abc } 1 test util-5.3 {Tcl_StringMatch} { - string match ab* abcdef + Wrapper_Tcl_StringMatch ab* abcdef } 1 test util-5.4 {Tcl_StringMatch} { - string match *c abc + Wrapper_Tcl_StringMatch *c abc } 1 test util-5.5 {Tcl_StringMatch} { - string match *3*6*9 0123456789 + Wrapper_Tcl_StringMatch *3*6*9 0123456789 } 1 test util-5.6 {Tcl_StringMatch} { - string match *3*6*9 01234567890 + Wrapper_Tcl_StringMatch *3*6*9 01234567890 } 0 test util-5.7 {Tcl_StringMatch: UTF-8} { - string match *u \u4e4fu + Wrapper_Tcl_StringMatch *u \u4e4fu } 1 test util-5.8 {Tcl_StringMatch} { - string match a?c abc + Wrapper_Tcl_StringMatch a?c abc } 1 test util-5.9 {Tcl_StringMatch: UTF-8} { # skip one character in string - string match a?c a\u4e4fc + Wrapper_Tcl_StringMatch a?c a\u4e4fc } 1 test util-5.10 {Tcl_StringMatch} { - string match a??c abc + Wrapper_Tcl_StringMatch a??c abc } 0 test util-5.11 {Tcl_StringMatch} { - string match ?1??4???8? 0123456789 + Wrapper_Tcl_StringMatch ?1??4???8? 0123456789 } 1 test util-5.12 {Tcl_StringMatch} { - string match {[abc]bc} abc + Wrapper_Tcl_StringMatch {[abc]bc} abc } 1 test util-5.13 {Tcl_StringMatch: UTF-8} { # string += Tcl_UtfToUniChar(string, &ch); - string match "\[\u4e4fxy\]bc" "\u4e4fbc" + Wrapper_Tcl_StringMatch "\[\u4e4fxy\]bc" "\u4e4fbc" } 1 test util-5.14 {Tcl_StringMatch} { # if ((*pattern == ']') || (*pattern == '\0')) # badly formed pattern - string match {[]} {[]} + Wrapper_Tcl_StringMatch {[]} {[]} } 0 test util-5.15 {Tcl_StringMatch} { # if ((*pattern == ']') || (*pattern == '\0')) # badly formed pattern - string match {[} {[} + Wrapper_Tcl_StringMatch {[} {[} } 0 test util-5.16 {Tcl_StringMatch} { - string match {a[abc]c} abc + Wrapper_Tcl_StringMatch {a[abc]c} abc } 1 test util-5.17 {Tcl_StringMatch: UTF-8} { # pattern += Tcl_UtfToUniChar(pattern, &endChar); # get 1 UTF-8 character - string match "a\[a\u4e4fc]c" "a\u4e4fc" + Wrapper_Tcl_StringMatch "a\[a\u4e4fc]c" "a\u4e4fc" } 1 test util-5.18 {Tcl_StringMatch: UTF-8} { # pattern += Tcl_UtfToUniChar(pattern, &endChar); # proper advance: wrong answer would match on UTF trail byte of \u4e4f - string match {a[a\u4e4fc]c} [bytestring a\u008fc] + Wrapper_Tcl_StringMatch {a[a\u4e4fc]c} [bytestring a\u008fc] } 0 test util-5.19 {Tcl_StringMatch: UTF-8} { # pattern += Tcl_UtfToUniChar(pattern, &endChar); # proper advance. - string match {a[a\u4e4fc]c} "acc" + Wrapper_Tcl_StringMatch {a[a\u4e4fc]c} "acc" } 1 test util-5.20 {Tcl_StringMatch} { - string match {a[xyz]c} abc + Wrapper_Tcl_StringMatch {a[xyz]c} abc } 0 test util-5.21 {Tcl_StringMatch} { - string match {12[2-7]45} 12345 + Wrapper_Tcl_StringMatch {12[2-7]45} 12345 } 1 test util-5.22 {Tcl_StringMatch: UTF-8 range} { - string match "\[\u4e00-\u4e4f]" "0" + Wrapper_Tcl_StringMatch "\[\u4e00-\u4e4f]" "0" } 0 test util-5.23 {Tcl_StringMatch: UTF-8 range} { - string match "\[\u4e00-\u4e4f]" "\u4e33" + Wrapper_Tcl_StringMatch "\[\u4e00-\u4e4f]" "\u4e33" } 1 test util-5.24 {Tcl_StringMatch: UTF-8 range} { - string match "\[\u4e00-\u4e4f]" "\uff08" + Wrapper_Tcl_StringMatch "\[\u4e00-\u4e4f]" "\uff08" } 0 test util-5.25 {Tcl_StringMatch} { - string match {12[ab2-4cd]45} 12345 + Wrapper_Tcl_StringMatch {12[ab2-4cd]45} 12345 } 1 test util-5.26 {Tcl_StringMatch} { - string match {12[ab2-4cd]45} 12b45 + Wrapper_Tcl_StringMatch {12[ab2-4cd]45} 12b45 } 1 test util-5.27 {Tcl_StringMatch} { - string match {12[ab2-4cd]45} 12d45 + Wrapper_Tcl_StringMatch {12[ab2-4cd]45} 12d45 } 1 test util-5.28 {Tcl_StringMatch} { - string match {12[ab2-4cd]45} 12145 + Wrapper_Tcl_StringMatch {12[ab2-4cd]45} 12145 } 0 test util-5.29 {Tcl_StringMatch} { - string match {12[ab2-4cd]45} 12545 + Wrapper_Tcl_StringMatch {12[ab2-4cd]45} 12545 } 0 test util-5.30 {Tcl_StringMatch: forwards range} { - string match {[k-w]} "z" + Wrapper_Tcl_StringMatch {[k-w]} "z" } 0 test util-5.31 {Tcl_StringMatch: forwards range} { - string match {[k-w]} "w" + Wrapper_Tcl_StringMatch {[k-w]} "w" } 1 test util-5.32 {Tcl_StringMatch: forwards range} { - string match {[k-w]} "r" + Wrapper_Tcl_StringMatch {[k-w]} "r" } 1 test util-5.33 {Tcl_StringMatch: forwards range} { - string match {[k-w]} "k" + Wrapper_Tcl_StringMatch {[k-w]} "k" } 1 test util-5.34 {Tcl_StringMatch: forwards range} { - string match {[k-w]} "a" + Wrapper_Tcl_StringMatch {[k-w]} "a" } 0 test util-5.35 {Tcl_StringMatch: reverse range} { - string match {[w-k]} "z" + Wrapper_Tcl_StringMatch {[w-k]} "z" } 0 test util-5.36 {Tcl_StringMatch: reverse range} { - string match {[w-k]} "w" + Wrapper_Tcl_StringMatch {[w-k]} "w" } 1 test util-5.37 {Tcl_StringMatch: reverse range} { - string match {[w-k]} "r" + Wrapper_Tcl_StringMatch {[w-k]} "r" } 1 test util-5.38 {Tcl_StringMatch: reverse range} { - string match {[w-k]} "k" + Wrapper_Tcl_StringMatch {[w-k]} "k" } 1 test util-5.39 {Tcl_StringMatch: reverse range} { - string match {[w-k]} "a" + Wrapper_Tcl_StringMatch {[w-k]} "a" } 0 test util-5.40 {Tcl_StringMatch: skip correct number of ']'} { - string match {[A-]x} Ax + Wrapper_Tcl_StringMatch {[A-]x} Ax } 0 test util-5.41 {Tcl_StringMatch: skip correct number of ']'} { - string match {[A-]]x} Ax + Wrapper_Tcl_StringMatch {[A-]]x} Ax } 1 test util-5.42 {Tcl_StringMatch: skip correct number of ']'} { - string match {[A-]]x} \ue1x + Wrapper_Tcl_StringMatch {[A-]]x} \ue1x } 0 test util-5.43 {Tcl_StringMatch: skip correct number of ']'} { - string match \[A-]\ue1]x \ue1x + Wrapper_Tcl_StringMatch \[A-]\ue1]x \ue1x } 1 test util-5.44 {Tcl_StringMatch: skip correct number of ']'} { - string match {[A-]h]x} hx + Wrapper_Tcl_StringMatch {[A-]h]x} hx } 1 test util-5.45 {Tcl_StringMatch} { # if (*pattern == '\0') # badly formed pattern, still treats as a set - string match {[a} a + Wrapper_Tcl_StringMatch {[a} a } 1 test util-5.46 {Tcl_StringMatch} { - string match {a\*b} a*b + Wrapper_Tcl_StringMatch {a\*b} a*b } 1 test util-5.47 {Tcl_StringMatch} { - string match {a\*b} ab + Wrapper_Tcl_StringMatch {a\*b} ab } 0 test util-5.48 {Tcl_StringMatch} { - string match {a\*\?\[\]\\\x} "a*?\[\]\\x" + Wrapper_Tcl_StringMatch {a\*\?\[\]\\\x} "a*?\[\]\\x" } 1 test util-5.49 {Tcl_StringMatch} { - string match ** "" + Wrapper_Tcl_StringMatch ** "" } 1 test util-5.50 {Tcl_StringMatch} { - string match *. "" + Wrapper_Tcl_StringMatch *. "" } 0 test util-5.51 {Tcl_StringMatch} { - string match "" "" + Wrapper_Tcl_StringMatch "" "" } 1 test util-6.1 {Tcl_PrintDouble - using tcl_precision} { -- cgit v0.12