1 files changed, 114 insertions, 75 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index b9e1226..e5497a4 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -59,7 +59,7 @@
  * UTF-8.
  */
 
-static const unsigned char totalBytes[256] = {
+static CONST unsigned char totalBytes[256] = {
     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@@ -231,13 +231,13 @@ Tcl_UniCharToUtf(
 
 char *
 Tcl_UniCharToUtfDString(
-    const Tcl_UniChar *uniStr,	/* Unicode string to convert to UTF-8. */
-    size_t uniLength,		/* Length of Unicode string in Tcl_UniChars
+    CONST Tcl_UniChar *uniStr,	/* Unicode string to convert to UTF-8. */
+    int uniLength,		/* Length of Unicode string in Tcl_UniChars
 				 * (must be >= 0). */
     Tcl_DString *dsPtr)		/* UTF-8 representation of string is appended
 				 * to this previously initialized DString. */
 {
-    const Tcl_UniChar *w, *wEnd;
+    CONST Tcl_UniChar *w, *wEnd;
     char *p, *string;
     int oldLength;
 
@@ -289,7 +289,7 @@ Tcl_UniCharToUtfDString(
 
 int
 Tcl_UtfToUniChar(
-    register const char *src,	/* The UTF-8 string. */
+    register CONST char *src,	/* The UTF-8 string. */
     register Tcl_UniChar *chPtr)/* Filled with the Tcl_UniChar represented by
 				 * the UTF-8 string. */
 {
@@ -393,18 +393,18 @@ Tcl_UtfToUniChar(
 
 Tcl_UniChar *
 Tcl_UtfToUniCharDString(
-    const char *src,		/* UTF-8 string to convert to Unicode. */
-    size_t length,		/* Length of UTF-8 string in bytes, or
-				 * TCL_STRLEN for strlen(). */
+    CONST char *src,		/* UTF-8 string to convert to Unicode. */
+    int length,			/* Length of UTF-8 string in bytes, or -1 for
+				 * strlen(). */
     Tcl_DString *dsPtr)		/* Unicode representation of string is
 				 * appended to this previously initialized
 				 * DString. */
 {
     Tcl_UniChar *w, *wString;
-    const char *p, *end;
-    size_t oldLength;
+    CONST char *p, *end;
+    int oldLength;
 
-    if (length == TCL_STRLEN) {
+    if (length < 0) {
 	length = strlen(src);
     }
 
@@ -414,9 +414,8 @@ Tcl_UtfToUniCharDString(
      */
 
     oldLength = Tcl_DStringLength(dsPtr);
-/* TODO: fix overreach! */
     Tcl_DStringSetLength(dsPtr,
-	    (oldLength + length + 1) * sizeof(Tcl_UniChar));
+	    (int) ((oldLength + length + 1) * sizeof(Tcl_UniChar)));
     wString = (Tcl_UniChar *) (Tcl_DStringValue(dsPtr) + oldLength);
 
     w = wString;
@@ -427,7 +426,7 @@ Tcl_UtfToUniCharDString(
     }
     *w = '\0';
     Tcl_DStringSetLength(dsPtr,
-	    oldLength + ((char *) w - (char *) wString));
+	    (oldLength + ((char *) w - (char *) wString)));
 
     return wString;
 }
@@ -453,9 +452,9 @@ Tcl_UtfToUniCharDString(
 
 int
 Tcl_UtfCharComplete(
-    const char *src,		/* String to check if first few bytes contain
+    CONST char *src,		/* String to check if first few bytes contain
 				 * a complete UTF-8 character. */
-    size_t length)			/* Length of above string in bytes. */
+    int length)			/* Length of above string in bytes. */
 {
     int ch;
 
@@ -481,11 +480,11 @@ Tcl_UtfCharComplete(
  *---------------------------------------------------------------------------
  */
 
-size_t
+int
 Tcl_NumUtfChars(
-    register const char *src,	/* The UTF-8 string to measure. */
-    size_t length)		/* The length of the string in bytes, or
-				 * TCL_STRLEN for strlen(string). */
+    register CONST char *src,	/* The UTF-8 string to measure. */
+    int length)			/* The length of the string in bytes, or -1
+				 * for strlen(string). */
 {
     Tcl_UniChar ch;
     register Tcl_UniChar *chPtr = &ch;
@@ -499,7 +498,7 @@ Tcl_NumUtfChars(
      */
 
     i = 0;
-    if (length == TCL_STRLEN) {
+    if (length < 0) {
 	while (*src != '\0') {
 	    src += TclUtfToUniChar(src, chPtr);
 	    i++;
@@ -541,9 +540,9 @@ Tcl_NumUtfChars(
  *---------------------------------------------------------------------------
  */
 
-const char *
+CONST char *
 Tcl_UtfFindFirst(
-    const char *src,		/* The UTF-8 string to be searched. */
+    CONST char *src,		/* The UTF-8 string to be searched. */
     int ch)			/* The Tcl_UniChar to search for. */
 {
     int len;
@@ -580,14 +579,14 @@ Tcl_UtfFindFirst(
  *---------------------------------------------------------------------------
  */
 
-const char *
+CONST char *
 Tcl_UtfFindLast(
-    const char *src,		/* The UTF-8 string to be searched. */
+    CONST char *src,		/* The UTF-8 string to be searched. */
     int ch)			/* The Tcl_UniChar to search for. */
 {
     int len;
     Tcl_UniChar find;
-    const char *last;
+    CONST char *last;
 
     last = NULL;
     while (1) {
@@ -622,9 +621,9 @@ Tcl_UtfFindLast(
  *---------------------------------------------------------------------------
  */
 
-const char *
+CONST char *
 Tcl_UtfNext(
-    const char *src)		/* The current location in the string. */
+    CONST char *src)		/* The current location in the string. */
 {
     Tcl_UniChar ch;
 
@@ -652,13 +651,13 @@ Tcl_UtfNext(
  *---------------------------------------------------------------------------
  */
 
-const char *
+CONST char *
 Tcl_UtfPrev(
-    const char *src,		/* The current location in the string. */
-    const char *start)		/* Pointer to the beginning of the string, to
+    CONST char *src,		/* The current location in the string. */
+    CONST char *start)		/* Pointer to the beginning of the string, to
 				 * avoid going backwards too far. */
 {
-    const char *look;
+    CONST char *look;
     int i, byte;
 
     src--;
@@ -701,10 +700,10 @@ Tcl_UtfPrev(
 
 Tcl_UniChar
 Tcl_UniCharAtIndex(
-    register const char *src,	/* The UTF-8 string to dereference. */
-    register size_t index)	/* The position of the desired character. */
+    register CONST char *src,	/* The UTF-8 string to dereference. */
+    register int index)		/* The position of the desired character. */
 {
-    Tcl_UniChar ch = 0;
+    Tcl_UniChar ch;
 
     while (index >= 0) {
 	index--;
@@ -730,10 +729,10 @@ Tcl_UniCharAtIndex(
  *---------------------------------------------------------------------------
  */
 
-const char *
+CONST char *
 Tcl_UtfAtIndex(
-    register const char *src,	/* The UTF-8 string. */
-    register size_t index)	/* The position of the desired character. */
+    register CONST char *src,	/* The UTF-8 string. */
+    register int index)		/* The position of the desired character. */
 {
     Tcl_UniChar ch;
 
@@ -770,17 +769,18 @@ Tcl_UtfAtIndex(
  *---------------------------------------------------------------------------
  */
 
-size_t
+int
 Tcl_UtfBackslash(
-    const char *src,		/* Points to the backslash character of a
+    CONST char *src,		/* Points to the backslash character of a
 				 * backslash sequence. */
-    size_t *readPtr,		/* Fill in with number of characters read from
+    int *readPtr,		/* Fill in with number of characters read from
 				 * src, unless NULL. */
     char *dst)			/* Filled with the bytes represented by the
 				 * backslash sequence. */
 {
 #define LINE_LENGTH 128
-    size_t numRead, result;
+    int numRead;
+    int result;
 
     result = TclParseBackslash(src, LINE_LENGTH, &numRead, dst);
     if (numRead == LINE_LENGTH) {
@@ -820,7 +820,7 @@ Tcl_UtfToUpper(
 {
     Tcl_UniChar ch, upChar;
     char *src, *dst;
-    size_t bytes;
+    int bytes;
 
     /*
      * Iterate over the string until we hit the terminating null.
@@ -838,7 +838,7 @@ Tcl_UtfToUpper(
 	 */
 
 	if (bytes < UtfCount(upChar)) {
-	    memcpy(dst, src, bytes);
+	    memcpy(dst, src, (size_t) bytes);
 	    dst += bytes;
 	} else {
 	    dst += Tcl_UniCharToUtf(upChar, dst);
@@ -873,7 +873,7 @@ Tcl_UtfToLower(
 {
     Tcl_UniChar ch, lowChar;
     char *src, *dst;
-    size_t bytes;
+    int bytes;
 
     /*
      * Iterate over the string until we hit the terminating null.
@@ -891,7 +891,7 @@ Tcl_UtfToLower(
 	 */
 
 	if (bytes < UtfCount(lowChar)) {
-	    memcpy(dst, src, bytes);
+	    memcpy(dst, src, (size_t) bytes);
 	    dst += bytes;
 	} else {
 	    dst += Tcl_UniCharToUtf(lowChar, dst);
@@ -927,7 +927,7 @@ Tcl_UtfToTitle(
 {
     Tcl_UniChar ch, titleChar, lowChar;
     char *src, *dst;
-    size_t bytes;
+    int bytes;
 
     /*
      * Capitalize the first character and then lowercase the rest of the
@@ -941,7 +941,7 @@ Tcl_UtfToTitle(
 	titleChar = Tcl_UniCharToTitle(ch);
 
 	if (bytes < UtfCount(titleChar)) {
-	    memcpy(dst, src, bytes);
+	    memcpy(dst, src, (size_t) bytes);
 	    dst += bytes;
 	} else {
 	    dst += Tcl_UniCharToUtf(titleChar, dst);
@@ -953,7 +953,7 @@ Tcl_UtfToTitle(
 	lowChar = Tcl_UniCharToLower(ch);
 
 	if (bytes < UtfCount(lowChar)) {
-	    memcpy(dst, src, bytes);
+	    memcpy(dst, src, (size_t) bytes);
 	    dst += bytes;
 	} else {
 	    dst += Tcl_UniCharToUtf(lowChar, dst);
@@ -983,9 +983,9 @@ Tcl_UtfToTitle(
 
 int
 TclpUtfNcmp2(
-    const char *cs,		/* UTF string to compare to ct. */
-    const char *ct,		/* UTF string cs is compared to. */
-    size_t numBytes)		/* Number of *bytes* to compare. */
+    CONST char *cs,		/* UTF string to compare to ct. */
+    CONST char *ct,		/* UTF string cs is compared to. */
+    unsigned long numBytes)	/* Number of *bytes* to compare. */
 {
     /*
      * We can't simply call 'memcmp(cs, ct, numBytes);' because we need to
@@ -1030,9 +1030,9 @@ TclpUtfNcmp2(
 
 int
 Tcl_UtfNcmp(
-    const char *cs,		/* UTF string to compare to ct. */
-    const char *ct,		/* UTF string cs is compared to. */
-    size_t numChars)		/* Number of UTF chars to compare. */
+    CONST char *cs,		/* UTF string to compare to ct. */
+    CONST char *ct,		/* UTF string cs is compared to. */
+    unsigned long numChars)	/* Number of UTF chars to compare. */
 {
     Tcl_UniChar ch1, ch2;
 
@@ -1078,9 +1078,9 @@ Tcl_UtfNcmp(
 
 int
 Tcl_UtfNcasecmp(
-    const char *cs,		/* UTF string to compare to ct. */
-    const char *ct,		/* UTF string cs is compared to. */
-    size_t numChars)		/* Number of UTF chars to compare. */
+    CONST char *cs,		/* UTF string to compare to ct. */
+    CONST char *ct,		/* UTF string cs is compared to. */
+    unsigned long numChars)	/* Number of UTF chars to compare. */
 {
     Tcl_UniChar ch1, ch2;
     while (numChars-- > 0) {
@@ -1105,6 +1105,46 @@ Tcl_UtfNcasecmp(
 /*
  *----------------------------------------------------------------------
  *
+ * Tcl_UtfNcasecmp --
+ *
+ *	Compare UTF chars of string cs to string ct case insensitively.
+ *	Replacement for strcasecmp in Tcl core, in places where UTF-8 should
+ *	be handled.
+ *
+ * Results:
+ *	Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
+ *
+ * Side effects:
+ *	None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclUtfCasecmp(
+    CONST char *cs,		/* UTF string to compare to ct. */
+    CONST char *ct)		/* UTF string cs is compared to. */
+{
+    while (*cs && *ct) {
+	Tcl_UniChar ch1, ch2;
+
+	cs += TclUtfToUniChar(cs, &ch1);
+	ct += TclUtfToUniChar(ct, &ch2);
+	if (ch1 != ch2) {
+	    ch1 = Tcl_UniCharToLower(ch1);
+	    ch2 = Tcl_UniCharToLower(ch2);
+	    if (ch1 != ch2) {
+		return ch1 - ch2;
+	    }
+	}
+    }
+    return UCHAR(*cs) - UCHAR(*ct);
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
  * Tcl_UniCharToUpper --
  *
  *	Compute the uppercase equivalent of the given Unicode character.
@@ -1212,7 +1252,7 @@ Tcl_UniCharToTitle(
 
 int
 Tcl_UniCharLen(
-    const Tcl_UniChar *uniStr)	/* Unicode string to find length of. */
+    CONST Tcl_UniChar *uniStr)	/* Unicode string to find length of. */
 {
     int len = 0;
 
@@ -1242,9 +1282,9 @@ Tcl_UniCharLen(
 
 int
 Tcl_UniCharNcmp(
-    const Tcl_UniChar *ucs,	/* Unicode string to compare to uct. */
-    const Tcl_UniChar *uct,	/* Unicode string ucs is compared to. */
-    size_t numChars)		/* Number of unichars to compare. */
+    CONST Tcl_UniChar *ucs,	/* Unicode string to compare to uct. */
+    CONST Tcl_UniChar *uct,	/* Unicode string ucs is compared to. */
+    unsigned long numChars)	/* Number of unichars to compare. */
 {
 #ifdef WORDS_BIGENDIAN
     /*
@@ -1287,9 +1327,9 @@ Tcl_UniCharNcmp(
 
 int
 Tcl_UniCharNcasecmp(
-    const Tcl_UniChar *ucs,	/* Unicode string to compare to uct. */
-    const Tcl_UniChar *uct,	/* Unicode string ucs is compared to. */
-    size_t numChars)		/* Number of unichars to compare. */
+    CONST Tcl_UniChar *ucs,	/* Unicode string to compare to uct. */
+    CONST Tcl_UniChar *uct,	/* Unicode string ucs is compared to. */
+    unsigned long numChars)	/* Number of unichars to compare. */
 {
     for ( ; numChars != 0; numChars--, ucs++, uct++) {
 	if (*ucs != *uct) {
@@ -1514,9 +1554,8 @@ Tcl_UniCharIsSpace(
      */
 
     if (((Tcl_UniChar) ch) < ((Tcl_UniChar) 0x80)) {
-	return isspace(UCHAR(ch)); /* INTL: ISO space */
-    } else if ((Tcl_UniChar) ch == 0x0085 || (Tcl_UniChar) ch == 0x200b
-	    || (Tcl_UniChar) ch == 0x2060 || (Tcl_UniChar) ch == 0xfeff) {
+	return TclIsSpaceProc((char) ch);
+    } else if ((Tcl_UniChar) ch == 0x180e || (Tcl_UniChar) ch == 0x202f) {
 	return 1;
     } else {
 	return ((SPACE_BITS >> GetCategory(ch)) & 1);
@@ -1594,8 +1633,8 @@ Tcl_UniCharIsWordChar(
 
 int
 Tcl_UniCharCaseMatch(
-    const Tcl_UniChar *uniStr,	/* Unicode String. */
-    const Tcl_UniChar *uniPattern,
+    CONST Tcl_UniChar *uniStr,	/* Unicode String. */
+    CONST Tcl_UniChar *uniPattern,
 				/* Pattern, which may contain special
 				 * characters. */
     int nocase)			/* 0 for case sensitive, 1 for insensitive */
@@ -1782,14 +1821,14 @@ Tcl_UniCharCaseMatch(
 
 int
 TclUniCharMatch(
-    const Tcl_UniChar *string,	/* Unicode String. */
-    size_t strLen,			/* Length of String */
-    const Tcl_UniChar *pattern,	/* Pattern, which may contain special
+    CONST Tcl_UniChar *string,	/* Unicode String. */
+    int strLen,			/* Length of String */
+    CONST Tcl_UniChar *pattern,	/* Pattern, which may contain special
 				 * characters. */
-    size_t ptnLen,			/* Length of Pattern */
+    int ptnLen,			/* Length of Pattern */
     int nocase)			/* 0 for case sensitive, 1 for insensitive */
 {
-    const Tcl_UniChar *stringEnd, *patternEnd;
+    CONST Tcl_UniChar *stringEnd, *patternEnd;
     Tcl_UniChar p;
 
     stringEnd = string + strLen;