From 75c6074fa9ed66a5fe5ac9afbd633cf0207e0899 Mon Sep 17 00:00:00 2001
From: dkf <donal.k.fellows@manchester.ac.uk>
Date: Wed, 2 Jan 2002 13:52:03 +0000
Subject: Fixed fault with case-insensitive string matching (Bug#233257) and
 rewrote some tests to test what they claimed to be testing.

---
 ChangeLog         |  12 ++++++
 generic/tclUtf.c  |   5 ++-
 generic/tclUtil.c |  27 ++++++++++----
 tests/string.test |   6 ++-
 tests/util.test   | 108 ++++++++++++++++++++++++++++--------------------------
 5 files changed, 97 insertions(+), 61 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index b6971fa..34a3b74 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2002-01-02  Donal K. Fellows  <fellowsd@cs.man.ac.uk>
+
+	* tests/util.test (Wrapper_Tcl_StringMatch,util-5.*): Rewrote so
+	the test is performed with the right internal function since
+	[string match] no longer uses Tcl_StringCaseMatch internally.
+
+	* tests/string.test (string-11.51):
+	* generic/tclUtf.c (Tcl_UniCharCaseMatch):
+	* generic/tclUtil.c (Tcl_StringCaseMatch): Fault with matching
+	case-insensitive non-ASCII patterns containing upper case
+	characters.  [Bug #233257]
+
 2001-12-28  Jeff Hobbs  <jeffh@ActiveState.com>
 
 	* library/init.tcl: make sure env(COMSPEC) on Windows is executed
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 17990db..5bdf557 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -8,7 +8,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclUtf.c,v 1.19 2001/10/16 05:31:19 dgp Exp $
+ * RCS: @(#) $Id: tclUtf.c,v 1.20 2002/01/02 13:52:04 dkf Exp $
  */
 
 #include "tclInt.h"
@@ -1691,6 +1691,9 @@ Tcl_UniCharCaseMatch(string, pattern, nocase)
 	    if (p == 0) {
 		return 1;
 	    }
+	    if (nocase) {
+		p = Tcl_UniCharToLower(p);
+	    }
 	    while (1) {
 		/*
 		 * Optimization for matching - cruise through the string
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 11134be..52a5566 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -11,7 +11,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- *  RCS: @(#) $Id: tclUtil.c,v 1.26 2001/11/21 02:36:21 hobbs Exp $
+ *  RCS: @(#) $Id: tclUtil.c,v 1.27 2002/01/02 13:52:04 dkf Exp $
  */
 
 #include "tclInt.h"
@@ -1227,6 +1227,10 @@ Tcl_StringCaseMatch(string, pattern, nocase)
 	    if (p == '\0') {
 		return 1;
 	    }
+	    Tcl_UtfToUniChar(pattern, &ch2);
+	    if (nocase) {
+		ch2 = Tcl_UniCharToLower(ch2);
+	    }
 	    while (1) {
 		/*
 		 * Optimization for matching - cruise through the string
@@ -1235,16 +1239,25 @@ Tcl_StringCaseMatch(string, pattern, nocase)
 		 */
 		if ((p != '[') && (p != '?') && (p != '\\')) {
 		    if (nocase) {
-			while (*string && (p != *string)) {
-			    ch2 = Tcl_UtfToUniChar(string, &ch1);
-			    if (p == Tcl_UniCharToLower(ch1)) {
+			while (*string) {
+			    int charLen = Tcl_UtfToUniChar(string, &ch1);
+			    if (ch2==ch1 || ch2==Tcl_UniCharToLower(ch1)) {
 				break;
 			    }
-			    string += ch2;
+			    string += charLen;
 			}
 		    } else {
-			while (*string && (p != *string)) {
-			    string += Tcl_UtfToUniChar(string, &ch1);
+			/*
+			 * There's no point in trying to make this code
+			 * shorter, as the number of bytes you want to
+			 * compare each time is non-constant.
+			 */
+			while (*string) {
+			    int charLen = Tcl_UtfToUniChar(string, &ch1);
+			    if (ch2 == ch1) {
+				break;
+			    }
+			    string += charLen;
 			}
 		    }
 		}
diff --git a/tests/string.test b/tests/string.test
index 15b88d7..2ab91b2 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -12,7 +12,7 @@
 # See the file "license.terms" for information on usage and redistribution
 # of this file, and for a DISCLAIMER OF ALL WARRANTIES.
 #
-# RCS: @(#) $Id: string.test,v 1.30 2001/11/14 23:16:36 hobbs Exp $
+# RCS: @(#) $Id: string.test,v 1.31 2002/01/02 13:52:04 dkf Exp $
 
 if {[lsearch [namespace children] ::tcltest] == -1} {
     package require tcltest
@@ -855,6 +855,10 @@ test string-11.49 {string match, *special case} {
 test string-11.50 {string match, *special case} {
     string match "\\" "\\"
 } 0
+test string-11.51 {string match; *, -nocase and UTF-8} {
+    string match -nocase [binary format I 717316707] \
+	    [binary format I 2028036707]
+} 1
 
 
 test string-12.1 {string range} {
diff --git a/tests/util.test b/tests/util.test
index d39a6da..fe94732 100644
--- a/tests/util.test
+++ b/tests/util.test
@@ -7,7 +7,7 @@
 # See the file "license.terms" for information on usage and redistribution
 # of this file, and for a DISCLAIMER OF ALL WARRANTIES.
 #
-# RCS: @(#) $Id: util.test,v 1.9 2001/09/19 08:52:46 dkf Exp $
+# RCS: @(#) $Id: util.test,v 1.10 2002/01/02 13:52:04 dkf Exp $
 
 if {[lsearch [namespace children] ::tcltest] == -1} {
     package require tcltest
@@ -67,180 +67,184 @@ test util-4.6 {Tcl_ConcatObj - utf-8 sequence with "whitespace" char} {
     concat \xe0
 } \xe0
 
+proc Wrapper_Tcl_StringMatch {pattern string} {
+    # Forces use of Tcl_StringMatch, not Tcl_UniCharCaseMatch
+    switch -glob -- $string $pattern {return 1} default {return 0}
+}
 test util-5.1 {Tcl_StringMatch} {
-    string match ab*c abc
+    Wrapper_Tcl_StringMatch ab*c abc
 } 1
 test util-5.2 {Tcl_StringMatch} {
-    string match ab**c abc
+    Wrapper_Tcl_StringMatch ab**c abc
 } 1
 test util-5.3 {Tcl_StringMatch} {
-    string match ab* abcdef
+    Wrapper_Tcl_StringMatch ab* abcdef
 } 1
 test util-5.4 {Tcl_StringMatch} {
-    string match *c abc
+    Wrapper_Tcl_StringMatch *c abc
 } 1
 test util-5.5 {Tcl_StringMatch} {
-    string match *3*6*9 0123456789
+    Wrapper_Tcl_StringMatch *3*6*9 0123456789
 } 1
 test util-5.6 {Tcl_StringMatch} {
-    string match *3*6*9 01234567890
+    Wrapper_Tcl_StringMatch *3*6*9 01234567890
 } 0
 test util-5.7 {Tcl_StringMatch: UTF-8} {
-    string match *u \u4e4fu
+    Wrapper_Tcl_StringMatch *u \u4e4fu
 } 1
 test util-5.8 {Tcl_StringMatch} {
-    string match a?c abc
+    Wrapper_Tcl_StringMatch a?c abc
 } 1
 test util-5.9 {Tcl_StringMatch: UTF-8} {
     # skip one character in string
 
-    string match a?c a\u4e4fc
+    Wrapper_Tcl_StringMatch a?c a\u4e4fc
 } 1
 test util-5.10 {Tcl_StringMatch} {
-    string match a??c abc
+    Wrapper_Tcl_StringMatch a??c abc
 } 0
 test util-5.11 {Tcl_StringMatch} {
-    string match ?1??4???8? 0123456789
+    Wrapper_Tcl_StringMatch ?1??4???8? 0123456789
 } 1
 test util-5.12 {Tcl_StringMatch} {
-    string match {[abc]bc} abc
+    Wrapper_Tcl_StringMatch {[abc]bc} abc
 } 1
 test util-5.13 {Tcl_StringMatch: UTF-8} {
     # string += Tcl_UtfToUniChar(string, &ch);
 
-    string match "\[\u4e4fxy\]bc" "\u4e4fbc"
+    Wrapper_Tcl_StringMatch "\[\u4e4fxy\]bc" "\u4e4fbc"
 } 1
 test util-5.14 {Tcl_StringMatch} {
     # if ((*pattern == ']') || (*pattern == '\0'))
     # badly formed pattern
 
-    string match {[]} {[]}
+    Wrapper_Tcl_StringMatch {[]} {[]}
 } 0
 test util-5.15 {Tcl_StringMatch} {
     # if ((*pattern == ']') || (*pattern == '\0'))
     # badly formed pattern
 
-    string match {[} {[}
+    Wrapper_Tcl_StringMatch {[} {[}
 } 0
 test util-5.16 {Tcl_StringMatch} {
-    string match {a[abc]c} abc
+    Wrapper_Tcl_StringMatch {a[abc]c} abc
 } 1
 test util-5.17 {Tcl_StringMatch: UTF-8} {
     # pattern += Tcl_UtfToUniChar(pattern, &endChar);
     # get 1 UTF-8 character
 
-    string match "a\[a\u4e4fc]c" "a\u4e4fc"
+    Wrapper_Tcl_StringMatch "a\[a\u4e4fc]c" "a\u4e4fc"
 } 1
 test util-5.18 {Tcl_StringMatch: UTF-8} {
     # pattern += Tcl_UtfToUniChar(pattern, &endChar);
     # proper advance: wrong answer would match on UTF trail byte of \u4e4f
 
-    string match {a[a\u4e4fc]c} [bytestring a\u008fc]
+    Wrapper_Tcl_StringMatch {a[a\u4e4fc]c} [bytestring a\u008fc]
 } 0
 test util-5.19 {Tcl_StringMatch: UTF-8} {
     # pattern += Tcl_UtfToUniChar(pattern, &endChar);
     # proper advance.
 
-    string match {a[a\u4e4fc]c} "acc"
+    Wrapper_Tcl_StringMatch {a[a\u4e4fc]c} "acc"
 } 1
 test util-5.20 {Tcl_StringMatch} {
-    string match {a[xyz]c} abc
+    Wrapper_Tcl_StringMatch {a[xyz]c} abc
 } 0
 test util-5.21 {Tcl_StringMatch} {
-    string match {12[2-7]45} 12345
+    Wrapper_Tcl_StringMatch {12[2-7]45} 12345
 } 1
 test util-5.22 {Tcl_StringMatch: UTF-8 range} {
-    string match "\[\u4e00-\u4e4f]" "0"
+    Wrapper_Tcl_StringMatch "\[\u4e00-\u4e4f]" "0"
 } 0
 test util-5.23 {Tcl_StringMatch: UTF-8 range} {
-    string match "\[\u4e00-\u4e4f]" "\u4e33"
+    Wrapper_Tcl_StringMatch "\[\u4e00-\u4e4f]" "\u4e33"
 } 1
 test util-5.24 {Tcl_StringMatch: UTF-8 range} {
-    string match "\[\u4e00-\u4e4f]" "\uff08"
+    Wrapper_Tcl_StringMatch "\[\u4e00-\u4e4f]" "\uff08"
 } 0
 test util-5.25 {Tcl_StringMatch} {
-    string match {12[ab2-4cd]45} 12345
+    Wrapper_Tcl_StringMatch {12[ab2-4cd]45} 12345
 } 1
 test util-5.26 {Tcl_StringMatch} {
-    string match {12[ab2-4cd]45} 12b45
+    Wrapper_Tcl_StringMatch {12[ab2-4cd]45} 12b45
 } 1
 test util-5.27 {Tcl_StringMatch} {
-    string match {12[ab2-4cd]45} 12d45
+    Wrapper_Tcl_StringMatch {12[ab2-4cd]45} 12d45
 } 1
 test util-5.28 {Tcl_StringMatch} {
-    string match {12[ab2-4cd]45} 12145
+    Wrapper_Tcl_StringMatch {12[ab2-4cd]45} 12145
 } 0
 test util-5.29 {Tcl_StringMatch} {
-    string match {12[ab2-4cd]45} 12545
+    Wrapper_Tcl_StringMatch {12[ab2-4cd]45} 12545
 } 0
 test util-5.30 {Tcl_StringMatch: forwards range} {
-    string match {[k-w]} "z"
+    Wrapper_Tcl_StringMatch {[k-w]} "z"
 } 0
 test util-5.31 {Tcl_StringMatch: forwards range} {
-    string match {[k-w]} "w"
+    Wrapper_Tcl_StringMatch {[k-w]} "w"
 } 1
 test util-5.32 {Tcl_StringMatch: forwards range} {
-    string match {[k-w]} "r"
+    Wrapper_Tcl_StringMatch {[k-w]} "r"
 } 1
 test util-5.33 {Tcl_StringMatch: forwards range} {
-    string match {[k-w]} "k"
+    Wrapper_Tcl_StringMatch {[k-w]} "k"
 } 1
 test util-5.34 {Tcl_StringMatch: forwards range} {
-    string match {[k-w]} "a"
+    Wrapper_Tcl_StringMatch {[k-w]} "a"
 } 0
 test util-5.35 {Tcl_StringMatch: reverse range} {
-    string match {[w-k]} "z"
+    Wrapper_Tcl_StringMatch {[w-k]} "z"
 } 0
 test util-5.36 {Tcl_StringMatch: reverse range} {
-    string match {[w-k]} "w"
+    Wrapper_Tcl_StringMatch {[w-k]} "w"
 } 1
 test util-5.37 {Tcl_StringMatch: reverse range} {
-    string match {[w-k]} "r"
+    Wrapper_Tcl_StringMatch {[w-k]} "r"
 } 1
 test util-5.38 {Tcl_StringMatch: reverse range} {
-    string match {[w-k]} "k"
+    Wrapper_Tcl_StringMatch {[w-k]} "k"
 } 1
 test util-5.39 {Tcl_StringMatch: reverse range} {
-    string match {[w-k]} "a"
+    Wrapper_Tcl_StringMatch {[w-k]} "a"
 } 0
 test util-5.40 {Tcl_StringMatch: skip correct number of ']'} {
-    string match {[A-]x} Ax
+    Wrapper_Tcl_StringMatch {[A-]x} Ax
 } 0
 test util-5.41 {Tcl_StringMatch: skip correct number of ']'} {
-    string match {[A-]]x} Ax
+    Wrapper_Tcl_StringMatch {[A-]]x} Ax
 } 1
 test util-5.42 {Tcl_StringMatch: skip correct number of ']'} {
-    string match {[A-]]x} \ue1x
+    Wrapper_Tcl_StringMatch {[A-]]x} \ue1x
 } 0
 test util-5.43 {Tcl_StringMatch: skip correct number of ']'} {
-    string match \[A-]\ue1]x \ue1x
+    Wrapper_Tcl_StringMatch \[A-]\ue1]x \ue1x
 } 1
 test util-5.44 {Tcl_StringMatch: skip correct number of ']'} {
-    string match {[A-]h]x} hx
+    Wrapper_Tcl_StringMatch {[A-]h]x} hx
 } 1
 test util-5.45 {Tcl_StringMatch} {
     # if (*pattern == '\0')
     # badly formed pattern, still treats as a set
 
-    string match {[a} a
+    Wrapper_Tcl_StringMatch {[a} a
 } 1
 test util-5.46 {Tcl_StringMatch} {
-    string match {a\*b} a*b
+    Wrapper_Tcl_StringMatch {a\*b} a*b
 } 1
 test util-5.47 {Tcl_StringMatch} {
-    string match {a\*b} ab
+    Wrapper_Tcl_StringMatch {a\*b} ab
 } 0
 test util-5.48 {Tcl_StringMatch} {
-    string match {a\*\?\[\]\\\x} "a*?\[\]\\x"
+    Wrapper_Tcl_StringMatch {a\*\?\[\]\\\x} "a*?\[\]\\x"
 } 1
 test util-5.49 {Tcl_StringMatch} {
-    string match ** ""
+    Wrapper_Tcl_StringMatch ** ""
 } 1
 test util-5.50 {Tcl_StringMatch} {
-    string match *. ""
+    Wrapper_Tcl_StringMatch *. ""
 } 0
 test util-5.51 {Tcl_StringMatch} {
-    string match "" ""
+    Wrapper_Tcl_StringMatch "" ""
 } 1
 
 test util-6.1 {Tcl_PrintDouble - using tcl_precision} {
-- 
cgit v0.12