Merge 8.7

Add function Tcl_UniCharFold(). It's the same as Tcl_UniCharToLower() for now, but that will change.
author: jan.nijtmans <nijtmans@users.sourceforge.net> 2020-05-22 14:36:34 (GMT)
committer: jan.nijtmans <nijtmans@users.sourceforge.net> 2020-05-22 14:36:34 (GMT)
commit: bc13dc596e5c32c6852325bf9b32eada7f8c7120 (patch)
tree: a6a4759d6663fff6858bc0e8d458131aab901bf2
parent: 8c514a610182c73e334bb61d9030238d66e02bbd (diff)
parent: 6de32c896abb44a00ad7368892924e9c9de5db11 (diff)
download: tcl-bc13dc596e5c32c6852325bf9b32eada7f8c7120.zip
tcl-bc13dc596e5c32c6852325bf9b32eada7f8c7120.tar.gz
tcl-bc13dc596e5c32c6852325bf9b32eada7f8c7120.tar.bz2
13 files changed, 104 insertions, 43 deletions
diff --git a/doc/ToUpper.3 b/doc/ToUpper.3
index fd9ddfb..5456538 100644
--- a/doc/ToUpper.3
+++ b/doc/ToUpper.3
@@ -8,7 +8,7 @@
 .so man.macros
 .BS
 .SH NAME
-Tcl_UniCharToUpper, Tcl_UniCharToLower, Tcl_UniCharToTitle, Tcl_UtfToUpper, Tcl_UtfToLower, Tcl_UtfToTitle \- routines for manipulating the case of Unicode characters and UTF-8 strings
+Tcl_UniCharToUpper, Tcl_UniCharToLower, Tcl_UniCharFold, Tcl_UniCharToTitle, Tcl_UtfToUpper, Tcl_UtfToLower, Tcl_UtfToTitle \- routines for manipulating the case of Unicode characters and UTF-8 strings
 .SH SYNOPSIS
 .nf
 \fB#include <tcl.h>\fR
@@ -17,6 +17,9 @@ int
 \fBTcl_UniCharToUpper\fR(\fIch\fR)
 .sp
 int
+\fBTcl_UniCharFold\fR(\fIch\fR)
+.sp
+int
 \fBTcl_UniCharToLower\fR(\fIch\fR)
 .sp
 int
@@ -52,6 +55,11 @@ If \fIch\fR represents an upper-case character,
 character.  If no lower-case character is defined, it returns the
 character unchanged.
 .PP
+If \fIch\fR represents an upper-case or lower-case character,
+\fBTcl_UniCharFold\fR returns the corresponding folded
+character.  If no upper-case or lower-case character is defined, it returns the
+character unchanged.
+.PP
 If \fIch\fR represents a lower-case character,
 \fBTcl_UniCharToTitle\fR returns the corresponding title-case
 character.  If no title-case character is defined, it returns the
diff --git a/generic/regc_locale.c b/generic/regc_locale.c
index c90dd64..cc4681b 100644
--- a/generic/regc_locale.c
+++ b/generic/regc_locale.c
@@ -1269,7 +1269,7 @@ casecmp(
     size_t len)			/* exact length of comparison */
 {
     for (; len > 0; len--, x++, y++) {
-	if ((*x!=*y) && (Tcl_UniCharToLower(*x) != Tcl_UniCharToLower(*y))) {
+	if ((*x!=*y) && (Tcl_UniCharFold(*x) != Tcl_UniCharFold(*y))) {
 	    return 1;
 	}
     }
diff --git a/generic/tcl.decls b/generic/tcl.decls
index 4ccedd1..fb967d6 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -2412,6 +2412,9 @@ declare 650 {
 declare 651 {
     const char *Tcl_UtfPrev(const char *src, const char *start)
 }
+declare 652 {
+    int Tcl_UniCharFold(int ch)
+}
 
 # ----- BASELINE -- FOR -- 8.7.0 ----- #
 
diff --git a/generic/tcl.h b/generic/tcl.h
index 369a894..02ef01e 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -2114,8 +2114,8 @@ typedef struct Tcl_EncodingType {
  * The maximum number of bytes that are necessary to represent a single
  * Unicode character in UTF-8. The valid values are 3 and 4
  * (or perhaps 1 if we want to support a non-unicode enabled core). If 3,
- * then Tcl_UniChar must be 2-bytes in size (UCS-2) (the default). If > 3,
- * then Tcl_UniChar must be 4-bytes in size (UCS-4). At this time UCS-2 mode
+ * then Tcl_UniChar must be 2-bytes in size (UTF-16) (the default). If > 3,
+ * then Tcl_UniChar must be 4-bytes in size (UCS-4). At this time UTF-16 mode
  * is the default and recommended mode.
  */
 
diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c
index 60331f5..63677a9 100644
--- a/generic/tclCmdIL.c
+++ b/generic/tclCmdIL.c
@@ -4776,8 +4776,8 @@ DictionaryCompare(
 	     * other interesting punctuations occur).
 	     */
 
-	    uniLeftLower = Tcl_UniCharToLower(uniLeft);
-	    uniRightLower = Tcl_UniCharToLower(uniRight);
+	    uniLeftLower = Tcl_UniCharFold(uniLeft);
+	    uniRightLower = Tcl_UniCharFold(uniRight);
 	} else {
 	    diff = UCHAR(*left) - UCHAR(*right);
 	    break;
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 8f0465d..8efdb27 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -629,10 +629,10 @@ Tcl_RegsubObjCmd(
 		wlen = 0;
 	    }
 	} else {
-	    wsrclc = Tcl_UniCharToLower(*wsrc);
+	    wsrclc = Tcl_UniCharFold(*wsrc);
 	    for (p = wfirstChar = wstring; wstring < wend; wstring++) {
 		if ((*wstring == *wsrc ||
-			(nocase && Tcl_UniCharToLower(*wstring)==wsrclc)) &&
+			(nocase && Tcl_UniCharFold(*wstring)==wsrclc)) &&
 			(slen==1 || (strCmpFn(wstring, wsrc,
 				(unsigned long) slen) == 0))) {
 		    if (numMatches == 0) {
@@ -2095,10 +2095,10 @@ StringMapCmd(
 	    ustring1 = end;
 	} else {
 	    mapString = Tcl_GetUnicodeFromObj(mapElemv[1], &mapLen);
-	    u2lc = (nocase ? Tcl_UniCharToLower(*ustring2) : 0);
+	    u2lc = (nocase ? Tcl_UniCharFold(*ustring2) : 0);
 	    for (; ustring1 < end; ustring1++) {
 		if (((*ustring1 == *ustring2) ||
-			(nocase&&Tcl_UniCharToLower(*ustring1)==u2lc)) &&
+			(nocase&&Tcl_UniCharFold(*ustring1)==u2lc)) &&
 			(length2==1 || strCmpFn(ustring1, ustring2,
 				(unsigned long) length2) == 0)) {
 		    if (p != ustring1) {
@@ -2133,7 +2133,7 @@ StringMapCmd(
 	    mapStrings[index] = Tcl_GetUnicodeFromObj(mapElemv[index],
 		    mapLens+index);
 	    if (nocase && ((index % 2) == 0)) {
-		u2lc[index/2] = Tcl_UniCharToLower(*mapStrings[index]);
+		u2lc[index/2] = Tcl_UniCharFold(*mapStrings[index]);
 	    }
 	}
 	for (p = ustring1; ustring1 < end; ustring1++) {
@@ -2145,7 +2145,7 @@ StringMapCmd(
 		ustring2 = mapStrings[index];
 		length2 = mapLens[index];
 		if ((length2 > 0) && ((*ustring1 == *ustring2) || (nocase &&
-			(Tcl_UniCharToLower(*ustring1) == u2lc[index/2]))) &&
+			(Tcl_UniCharFold(*ustring1) == u2lc[index/2]))) &&
 			/* Restrict max compare length. */
 			(end-ustring1 >= length2) && ((length2 == 1) ||
 			!strCmpFn(ustring2, ustring1, length2))) {
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index 7c1b22b..72a8967 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -1927,6 +1927,8 @@ EXTERN int		Tcl_UtfCharComplete(const char *src, int length);
 EXTERN const char *	Tcl_UtfNext(const char *src);
 /* 651 */
 EXTERN const char *	Tcl_UtfPrev(const char *src, const char *start);
+/* 652 */
+EXTERN int		Tcl_UniCharFold(int ch);
 
 typedef struct {
     const struct TclPlatStubs *tclPlatStubs;
@@ -2614,6 +2616,7 @@ typedef struct TclStubs {
     int (*tcl_UtfCharComplete) (const char *src, int length); /* 649 */
     const char * (*tcl_UtfNext) (const char *src); /* 650 */
     const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 651 */
+    int (*tcl_UniCharFold) (int ch); /* 652 */
 } TclStubs;
 
 extern const TclStubs *tclStubsPtr;
@@ -3948,6 +3951,8 @@ extern const TclStubs *tclStubsPtr;
 	(tclStubsPtr->tcl_UtfNext) /* 650 */
 #define Tcl_UtfPrev \
 	(tclStubsPtr->tcl_UtfPrev) /* 651 */
+#define Tcl_UniCharFold \
+	(tclStubsPtr->tcl_UniCharFold) /* 652 */
 
 #endif /* defined(USE_TCL_STUBS) */
 
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index 9aa7301..f597f5c 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -1901,6 +1901,7 @@ const TclStubs tclStubs = {
     Tcl_UtfCharComplete, /* 649 */
     Tcl_UtfNext, /* 650 */
     Tcl_UtfPrev, /* 651 */
+    Tcl_UniCharFold, /* 652 */
 };
 
 /* !END!: Do not edit above this line. */
diff --git a/generic/tclTest.c b/generic/tclTest.c
index 4899c84..a3d929e 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -23,7 +23,11 @@
 #   define TCL_NO_DEPRECATED
 #endif
 #include "tclInt.h"
-#include "tclTomMath.h"
+#ifdef TCL_WITH_EXTERNAL_TOMMATH
+#   include "tommath.h"
+#else
+#   include "tclTomMath.h"
+#endif
 #include "tclOO.h"
 #include <math.h>
 
@@ -449,9 +453,11 @@ Tcltest_Init(
     if (Tcl_InitStubs(interp, "8.5-", 0) == NULL) {
 	return TCL_ERROR;
     }
+#ifndef TCL_WITH_EXTERNAL_TOMMATH
     if (Tcl_TomMath_InitStubs(interp, "8.5-") == NULL) {
 	return TCL_ERROR;
     }
+#endif
     if (Tcl_OOInitStubs(interp) == NULL) {
 	return TCL_ERROR;
     }
diff --git a/generic/tclTomMath.h b/generic/tclTomMath.h
index e9257a0..0d2d320 100644
--- a/generic/tclTomMath.h
+++ b/generic/tclTomMath.h
@@ -2,13 +2,15 @@
 #define BN_TCL_H_
 
 #ifdef MP_NO_STDINT
-#ifdef HAVE_STDINT_H
-#  include <stdint.h>
+#   ifdef HAVE_STDINT_H
+#	include <stdint.h>
 #else
-#  include "../compat/stdint.h"
+#	include "../compat/stdint.h"
+#   endif
 #endif
+#ifndef BN_H_ /* If BN_H_ already defined, don't try to include tommath.h again. */
+#   include "tommath.h"
 #endif
-#include "tommath.h"
 #include "tclTomMathDecls.h"
 
 #endif
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 155b8f9..fd6ec1b 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -55,7 +55,7 @@
 #define UNICODE_SELF	0x80
 
 /*
- * The following structures are used when mapping between Unicode (UCS-2) and
+ * The following structures are used when mapping between Unicode and
  * UTF-8.
  */
 
@@ -1557,8 +1557,8 @@ Tcl_UtfNcasecmp(
 		return -ch2;
 	    }
 #endif
-	    ch1 = Tcl_UniCharToLower(ch1);
-	    ch2 = Tcl_UniCharToLower(ch2);
+	    ch1 = Tcl_UniCharFold(ch1);
+	    ch2 = Tcl_UniCharFold(ch2);
 	    if (ch1 != ch2) {
 		return (ch1 - ch2);
 	    }
@@ -1652,8 +1652,8 @@ TclUtfCasecmp(
 		return -ch2;
 	    }
 #endif
-	    ch1 = Tcl_UniCharToLower(ch1);
-	    ch2 = Tcl_UniCharToLower(ch2);
+	    ch1 = Tcl_UniCharFold(ch1);
+	    ch2 = Tcl_UniCharFold(ch2);
 	    if (ch1 != ch2) {
 		return ch1 - ch2;
 	    }
@@ -1725,6 +1725,38 @@ Tcl_UniCharToLower(
     /* Clear away extension bits, if any */
     return ch & 0x1FFFFF;
 }
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_UniCharFold --
+ *
+ *	Compute the lowercase equivalent of the given Unicode character.
+ *
+ * Results:
+ *	Returns the lowercase Unicode character.
+ *
+ * Side effects:
+ *	None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharFold(
+    int ch)			/* Unicode character to convert. */
+{
+    if (!UNICODE_OUT_OF_RANGE(ch)) {
+	int info = GetUniCharInfo(ch);
+	int mode = GetCaseType(info);
+
+	if ((mode & 0x02) && (mode != 0x7)) {
+	    ch += GetDelta(info);
+	}
+    }
+    /* Clear away extension bits, if any */
+    return ch & 0x1FFFFF;
+}
 
 /*
  *----------------------------------------------------------------------
@@ -1866,8 +1898,8 @@ Tcl_UniCharNcasecmp(
 {
     for ( ; numChars != 0; numChars--, ucs++, uct++) {
 	if (*ucs != *uct) {
-	    Tcl_UniChar lcs = Tcl_UniCharToLower(*ucs);
-	    Tcl_UniChar lct = Tcl_UniCharToLower(*uct);
+	    Tcl_UniChar lcs = Tcl_UniCharFold(*ucs);
+	    Tcl_UniChar lct = Tcl_UniCharFold(*uct);
 
 	    if (lcs != lct) {
 		return (lcs - lct);
@@ -2255,7 +2287,7 @@ Tcl_UniCharCaseMatch(
 		return 1;
 	    }
 	    if (nocase) {
-		p = Tcl_UniCharToLower(p);
+		p = Tcl_UniCharFold(p);
 	    }
 	    while (1) {
 		/*
@@ -2307,13 +2339,13 @@ Tcl_UniCharCaseMatch(
 	    Tcl_UniChar startChar, endChar;
 
 	    uniPattern++;
-	    ch1 = (nocase ? Tcl_UniCharToLower(*uniStr) : *uniStr);
+	    ch1 = (nocase ? Tcl_UniCharFold(*uniStr) : *uniStr);
 	    uniStr++;
 	    while (1) {
 		if ((*uniPattern == ']') || (*uniPattern == 0)) {
 		    return 0;
 		}
-		startChar = (nocase ? Tcl_UniCharToLower(*uniPattern)
+		startChar = (nocase ? Tcl_UniCharFold(*uniPattern)
 			: *uniPattern);
 		uniPattern++;
 		if (*uniPattern == '-') {
@@ -2321,7 +2353,7 @@ Tcl_UniCharCaseMatch(
 		    if (*uniPattern == 0) {
 			return 0;
 		    }
-		    endChar = (nocase ? Tcl_UniCharToLower(*uniPattern)
+		    endChar = (nocase ? Tcl_UniCharFold(*uniPattern)
 			    : *uniPattern);
 		    uniPattern++;
 		    if (((startChar <= ch1) && (ch1 <= endChar))
@@ -2363,8 +2395,8 @@ Tcl_UniCharCaseMatch(
 	 */
 
 	if (nocase) {
-	    if (Tcl_UniCharToLower(*uniStr) !=
-		    Tcl_UniCharToLower(*uniPattern)) {
+	    if (Tcl_UniCharFold(*uniStr) !=
+		    Tcl_UniCharFold(*uniPattern)) {
 		return 0;
 	    }
 	} else if (*uniStr != *uniPattern) {
@@ -2447,7 +2479,7 @@ TclUniCharMatch(
 	    }
 	    p = *pattern;
 	    if (nocase) {
-		p = Tcl_UniCharToLower(p);
+		p = Tcl_UniCharFold(p);
 	    }
 	    while (1) {
 		/*
@@ -2459,7 +2491,7 @@ TclUniCharMatch(
 		if ((p != '[') && (p != '?') && (p != '\\')) {
 		    if (nocase) {
 			while ((string < stringEnd) && (p != *string)
-				&& (p != Tcl_UniCharToLower(*string))) {
+				&& (p != Tcl_UniCharFold(*string))) {
 			    string++;
 			}
 		    } else {
@@ -2500,20 +2532,20 @@ TclUniCharMatch(
 	    Tcl_UniChar ch1, startChar, endChar;
 
 	    pattern++;
-	    ch1 = (nocase ? Tcl_UniCharToLower(*string) : *string);
+	    ch1 = (nocase ? Tcl_UniCharFold(*string) : *string);
 	    string++;
 	    while (1) {
 		if ((*pattern == ']') || (pattern == patternEnd)) {
 		    return 0;
 		}
-		startChar = (nocase ? Tcl_UniCharToLower(*pattern) : *pattern);
+		startChar = (nocase ? Tcl_UniCharFold(*pattern) : *pattern);
 		pattern++;
 		if (*pattern == '-') {
 		    pattern++;
 		    if (pattern == patternEnd) {
 			return 0;
 		    }
-		    endChar = (nocase ? Tcl_UniCharToLower(*pattern)
+		    endChar = (nocase ? Tcl_UniCharFold(*pattern)
 			    : *pattern);
 		    pattern++;
 		    if (((startChar <= ch1) && (ch1 <= endChar))
@@ -2555,7 +2587,7 @@ TclUniCharMatch(
 	 */
 
 	if (nocase) {
-	    if (Tcl_UniCharToLower(*string) != Tcl_UniCharToLower(*pattern)) {
+	    if (Tcl_UniCharFold(*string) != Tcl_UniCharFold(*pattern)) {
 		return 0;
 	    }
 	} else if (*string != *pattern) {
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 46cfcdd..87d6aab 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -2208,7 +2208,7 @@ Tcl_StringCaseMatch(
 	    } else {
 		TclUtfToUCS4(pattern, &ch2);
 		if (nocase) {
-		    ch2 = Tcl_UniCharToLower(ch2);
+		    ch2 = Tcl_UniCharFold(ch2);
 		}
 	    }
 
@@ -2223,7 +2223,7 @@ Tcl_StringCaseMatch(
 		    if (nocase) {
 			while (*str) {
 			    charLen = TclUtfToUCS4(str, &ch1);
-			    if (ch2==ch1 || ch2==Tcl_UniCharToLower(ch1)) {
+			    if (ch2==ch1 || ch2==Tcl_UniCharFold(ch1)) {
 				break;
 			    }
 			    str += charLen;
@@ -2282,7 +2282,7 @@ Tcl_StringCaseMatch(
 	    } else {
 		str += TclUtfToUCS4(str, &ch1);
 		if (nocase) {
-		    ch1 = Tcl_UniCharToLower(ch1);
+		    ch1 = Tcl_UniCharFold(ch1);
 		}
 	    }
 	    while (1) {
@@ -2296,7 +2296,7 @@ Tcl_StringCaseMatch(
 		} else {
 		    pattern += TclUtfToUCS4(pattern, &startChar);
 		    if (nocase) {
-			startChar = Tcl_UniCharToLower(startChar);
+			startChar = Tcl_UniCharFold(startChar);
 		    }
 		}
 		if (*pattern == '-') {
@@ -2311,7 +2311,7 @@ Tcl_StringCaseMatch(
 		    } else {
 			pattern += TclUtfToUCS4(pattern, &endChar);
 			if (nocase) {
-			    endChar = Tcl_UniCharToLower(endChar);
+			    endChar = Tcl_UniCharFold(endChar);
 			}
 		    }
 		    if (((startChar <= ch1) && (ch1 <= endChar))
@@ -2360,7 +2360,7 @@ Tcl_StringCaseMatch(
 	str += TclUtfToUCS4(str, &ch1);
 	pattern += TclUtfToUCS4(pattern, &ch2);
 	if (nocase) {
-	    if (Tcl_UniCharToLower(ch1) != Tcl_UniCharToLower(ch2)) {
+	    if (Tcl_UniCharFold(ch1) != Tcl_UniCharFold(ch2)) {
 		return 0;
 	    }
 	} else if (ch1 != ch2) {
diff --git a/win/tclWinTest.c b/win/tclWinTest.c
index 5841509..91a3010 100644
--- a/win/tclWinTest.c
+++ b/win/tclWinTest.c
@@ -13,7 +13,11 @@
 #   define USE_TCL_STUBS
 #endif
 #include "tclInt.h"
-#include "tclTomMath.h"
+#ifdef TCL_WITH_EXTERNAL_TOMMATH
+#   include "tommath.h"
+#else
+#   include "tclTomMath.h"
+#endif
 
 /*
  * For TestplatformChmod on Windows
author	jan.nijtmans <nijtmans@users.sourceforge.net>	2020-05-22 14:36:34 (GMT)
committer	jan.nijtmans <nijtmans@users.sourceforge.net>	2020-05-22 14:36:34 (GMT)
commit	bc13dc596e5c32c6852325bf9b32eada7f8c7120 (patch)
tree	a6a4759d6663fff6858bc0e8d458131aab901bf2
parent	8c514a610182c73e334bb61d9030238d66e02bbd (diff)
parent	6de32c896abb44a00ad7368892924e9c9de5db11 (diff)
download	tcl-bc13dc596e5c32c6852325bf9b32eada7f8c7120.zip tcl-bc13dc596e5c32c6852325bf9b32eada7f8c7120.tar.gz tcl-bc13dc596e5c32c6852325bf9b32eada7f8c7120.tar.bz2