1 files changed, 657 insertions, 569 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index b7a6277..b33bf6a 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -5,10 +5,8 @@
  *
  * Copyright (c) 1997-1998 Sun Microsystems, Inc.
  *
- * See the file "license.terms" for information on usage and redistribution
- * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
- *
- * RCS: @(#) $Id: tclUtf.c,v 1.30.2.2 2003/10/08 14:21:20 dkf Exp $
+ * See the file "license.terms" for information on usage and redistribution of
+ * this file, and for a DISCLAIMER OF ALL WARRANTIES.
  */
 
 #include "tclInt.h"
@@ -20,49 +18,48 @@
 #include "tclUniData.c"
 
 /*
- * The following macros are used for fast character category tests.  The
- * x_BITS values are shifted right by the category value to determine whether
- * the given category is included in the set.
- */ 
+ * The following macros are used for fast character category tests. The x_BITS
+ * values are shifted right by the category value to determine whether the
+ * given category is included in the set.
+ */
 
 #define ALPHA_BITS ((1 << UPPERCASE_LETTER) | (1 << LOWERCASE_LETTER) \
-    | (1 << TITLECASE_LETTER) | (1 << MODIFIER_LETTER) | (1 << OTHER_LETTER))
+	| (1 << TITLECASE_LETTER) | (1 << MODIFIER_LETTER) | (1<<OTHER_LETTER))
+
+#define CONTROL_BITS ((1 << CONTROL) | (1 << FORMAT) | (1 << PRIVATE_USE))
 
 #define DIGIT_BITS (1 << DECIMAL_DIGIT_NUMBER)
 
 #define SPACE_BITS ((1 << SPACE_SEPARATOR) | (1 << LINE_SEPARATOR) \
-    | (1 << PARAGRAPH_SEPARATOR))
-
-#define CONNECTOR_BITS (1 << CONNECTOR_PUNCTUATION)
+	| (1 << PARAGRAPH_SEPARATOR))
 
-#define PRINT_BITS (ALPHA_BITS | DIGIT_BITS | SPACE_BITS | \
-	    (1 << NON_SPACING_MARK) | (1 << ENCLOSING_MARK) | \
-	    (1 << COMBINING_SPACING_MARK) | (1 << LETTER_NUMBER) | \
-	    (1 << OTHER_NUMBER) | (1 << CONNECTOR_PUNCTUATION) | \
-	    (1 << DASH_PUNCTUATION) | (1 << OPEN_PUNCTUATION) | \
-	    (1 << CLOSE_PUNCTUATION) | (1 << INITIAL_QUOTE_PUNCTUATION) | \
-	    (1 << FINAL_QUOTE_PUNCTUATION) | (1 << OTHER_PUNCTUATION) | \
-	    (1 << MATH_SYMBOL) | (1 << CURRENCY_SYMBOL) | \
-	    (1 << MODIFIER_SYMBOL) | (1 << OTHER_SYMBOL))
+#define WORD_BITS (ALPHA_BITS | DIGIT_BITS | (1 << CONNECTOR_PUNCTUATION))
 
 #define PUNCT_BITS ((1 << CONNECTOR_PUNCTUATION) | \
-	    (1 << DASH_PUNCTUATION) | (1 << OPEN_PUNCTUATION) | \
-	    (1 << CLOSE_PUNCTUATION) | (1 << INITIAL_QUOTE_PUNCTUATION) | \
-	    (1 << FINAL_QUOTE_PUNCTUATION) | (1 << OTHER_PUNCTUATION))
+	(1 << DASH_PUNCTUATION) | (1 << OPEN_PUNCTUATION) | \
+	(1 << CLOSE_PUNCTUATION) | (1 << INITIAL_QUOTE_PUNCTUATION) | \
+	(1 << FINAL_QUOTE_PUNCTUATION) | (1 << OTHER_PUNCTUATION))
+
+#define GRAPH_BITS (WORD_BITS | PUNCT_BITS | \
+	(1 << NON_SPACING_MARK) | (1 << ENCLOSING_MARK) | \
+	(1 << COMBINING_SPACING_MARK) | (1 << LETTER_NUMBER) | \
+	(1 << OTHER_NUMBER) | \
+	(1 << MATH_SYMBOL) | (1 << CURRENCY_SYMBOL) | \
+	(1 << MODIFIER_SYMBOL) | (1 << OTHER_SYMBOL))
 
 /*
- * Unicode characters less than this value are represented by themselves 
- * in UTF-8 strings. 
+ * Unicode characters less than this value are represented by themselves in
+ * UTF-8 strings.
  */
 
 #define UNICODE_SELF	0x80
 
 /*
- * The following structures are used when mapping between Unicode (UCS-2)
- * and UTF-8.
+ * The following structures are used when mapping between Unicode (UCS-2) and
+ * UTF-8.
  */
 
-static CONST unsigned char totalBytes[256] = {
+static const unsigned char totalBytes[256] = {
     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@@ -76,29 +73,13 @@ static CONST unsigned char totalBytes[256] = {
 #else
     1,1,1,1,1,1,1,1,
 #endif
-#if TCL_UTF_MAX > 4
-    5,5,5,5,
-#else
-    1,1,1,1,
-#endif
-#if TCL_UTF_MAX > 5
-    6,6,6,6
-#else
-    1,1,1,1
-#endif
+    1,1,1,1,1,1,1,1
 };
-
-/*
- * Procedures used only in this module.
- */
-
-static int UtfCount _ANSI_ARGS_((int ch));
-
 
 /*
  *---------------------------------------------------------------------------
  *
- * UtfCount --
+ * TclUtfCount --
  *
  *	Find the number of bytes in the Utf character "ch".
  *
@@ -110,30 +91,21 @@ static int UtfCount _ANSI_ARGS_((int ch));
  *
  *---------------------------------------------------------------------------
  */
- 
-INLINE static int
-UtfCount(ch)
-    int ch;			/* The Tcl_UniChar whose size is returned. */
+
+int
+TclUtfCount(
+    int ch)			/* The Tcl_UniChar whose size is returned. */
 {
-    if ((ch > 0) && (ch < UNICODE_SELF)) {
+    if ((unsigned)(ch - 1) < (UNICODE_SELF - 1)) {
 	return 1;
     }
     if (ch <= 0x7FF) {
 	return 2;
     }
-    if (ch <= 0xFFFF) {
-	return 3;
-    }
 #if TCL_UTF_MAX > 3
-    if (ch <= 0x1FFFFF) {
+    if (((unsigned)(ch - 0x10000) <= 0xfffff)) {
 	return 4;
     }
-    if (ch <= 0x3FFFFFF) {
-	return 5;
-    }
-    if (ch <= 0x7FFFFFFF) {
-	return 6;
-    }
 #endif
     return 3;
 }
@@ -144,73 +116,75 @@ UtfCount(ch)
  * Tcl_UniCharToUtf --
  *
  *	Store the given Tcl_UniChar as a sequence of UTF-8 bytes in the
- *	provided buffer.  Equivalent to Plan 9 runetochar().
+ *	provided buffer. Equivalent to Plan 9 runetochar().
  *
  * Results:
- *	The return values is the number of bytes in the buffer that
- *	were consumed.  
+ *	The return values is the number of bytes in the buffer that were
+ *	consumed.
  *
  * Side effects:
  *	None.
  *
  *---------------------------------------------------------------------------
  */
- 
-INLINE int
-Tcl_UniCharToUtf(ch, str)
-    int ch;			/* The Tcl_UniChar to be stored in the
+
+int
+Tcl_UniCharToUtf(
+    int ch,			/* The Tcl_UniChar to be stored in the
 				 * buffer. */
-    char *str;			/* Buffer in which the UTF-8 representation
-				 * of the Tcl_UniChar is stored.  Buffer must
-				 * be large enough to hold the UTF-8 character
+    char *buf)			/* Buffer in which the UTF-8 representation of
+				 * the Tcl_UniChar is stored. Buffer must be
+				 * large enough to hold the UTF-8 character
 				 * (at most TCL_UTF_MAX bytes). */
 {
-    if ((ch > 0) && (ch < UNICODE_SELF)) {
-	str[0] = (char) ch;
+    if ((unsigned)(ch - 1) < (UNICODE_SELF - 1)) {
+	buf[0] = (char) ch;
 	return 1;
     }
-    if (ch <= 0x7FF) {
-	str[1] = (char) ((ch | 0x80) & 0xBF);
-	str[0] = (char) ((ch >> 6) | 0xC0);
-	return 2;
-    }
-    if (ch <= 0xFFFF) {
-	three:
-	str[2] = (char) ((ch | 0x80) & 0xBF);
-	str[1] = (char) (((ch >> 6) | 0x80) & 0xBF);
-	str[0] = (char) ((ch >> 12) | 0xE0);
-	return 3;
-    }
+    if (ch >= 0) {
+	if (ch <= 0x7FF) {
+	    buf[1] = (char) ((ch | 0x80) & 0xBF);
+	    buf[0] = (char) ((ch >> 6) | 0xC0);
+	    return 2;
+	}
+	if (ch <= 0xFFFF) {
+#if TCL_UTF_MAX == 4
+	    if ((ch & 0xF800) == 0xD800) {
+		if (ch & 0x0400) {
+		    /* Low surrogate */
+		    buf[3] = (char) ((ch | 0x80) & 0xBF);
+		    buf[2] |= (char) (((ch >> 6) | 0x80) & 0x8F);
+		    return 4;
+		} else {
+		    /* High surrogate */
+		    ch += 0x40;
+		    buf[2] = (char) (((ch << 4) | 0x80) & 0xB0);
+		    buf[1] = (char) (((ch >> 2) | 0x80) & 0xBF);
+		    buf[0] = (char) (((ch >> 8) | 0xF0) & 0xF7);
+		    return 0;
+		}
+	    }
+#endif
+	    goto three;
+	}
 
 #if TCL_UTF_MAX > 3
-    if (ch <= 0x1FFFFF) {
-	str[3] = (char) ((ch | 0x80) & 0xBF);
-	str[2] = (char) (((ch >> 6) | 0x80) & 0xBF);
-	str[1] = (char) (((ch >> 12) | 0x80) & 0xBF);
-	str[0] = (char) ((ch >> 18) | 0xF0);
-	return 4;
-    }
-    if (ch <= 0x3FFFFFF) {
-	str[4] = (char) ((ch | 0x80) & 0xBF);
-	str[3] = (char) (((ch >> 6) | 0x80) & 0xBF);
-	str[2] = (char) (((ch >> 12) | 0x80) & 0xBF);
-	str[1] = (char) (((ch >> 18) | 0x80) & 0xBF);
-	str[0] = (char) ((ch >> 24) | 0xF8);
-	return 5;
-    }
-    if (ch <= 0x7FFFFFFF) {
-	str[5] = (char) ((ch | 0x80) & 0xBF);
-	str[4] = (char) (((ch >> 6) | 0x80) & 0xBF);
-	str[3] = (char) (((ch >> 12) | 0x80) & 0xBF);
-	str[2] = (char) (((ch >> 18) | 0x80) & 0xBF);
-	str[1] = (char) (((ch >> 24) | 0x80) & 0xBF);
-	str[0] = (char) ((ch >> 30) | 0xFC);
-	return 6;
-    }
+	if (ch <= 0x10FFFF) {
+	    buf[3] = (char) ((ch | 0x80) & 0xBF);
+	    buf[2] = (char) (((ch >> 6) | 0x80) & 0xBF);
+	    buf[1] = (char) (((ch >> 12) | 0x80) & 0xBF);
+	    buf[0] = (char) ((ch >> 18) | 0xF0);
+	    return 4;
+	}
 #endif
+    }
 
     ch = 0xFFFD;
-    goto three;
+three:
+    buf[2] = (char) ((ch | 0x80) & 0xBF);
+    buf[1] = (char) (((ch >> 6) | 0x80) & 0xBF);
+    buf[0] = (char) ((ch >> 12) | 0xE0);
+    return 3;
 }
 
 /*
@@ -222,25 +196,24 @@ Tcl_UniCharToUtf(ch, str)
  *
  * Results:
  *	The return value is a pointer to the UTF-8 representation of the
- *	Unicode string.  Storage for the return value is appended to the
- *	end of dsPtr.
+ *	Unicode string. Storage for the return value is appended to the end of
+ *	dsPtr.
  *
  * Side effects:
  *	None.
  *
  *---------------------------------------------------------------------------
  */
- 
+
 char *
-Tcl_UniCharToUtfDString(wString, numChars, dsPtr)
-    CONST Tcl_UniChar *wString;	/* Unicode string to convert to UTF-8. */
-    int numChars;		/* Length of Unicode string in Tcl_UniChars
+Tcl_UniCharToUtfDString(
+    const Tcl_UniChar *uniStr,	/* Unicode string to convert to UTF-8. */
+    int uniLength,		/* Length of Unicode string in Tcl_UniChars
 				 * (must be >= 0). */
-    Tcl_DString *dsPtr;		/* UTF-8 representation of string is
-				 * appended to this previously initialized
-				 * DString. */
+    Tcl_DString *dsPtr)		/* UTF-8 representation of string is appended
+				 * to this previously initialized DString. */
 {
-    CONST Tcl_UniChar *w, *wEnd;
+    const Tcl_UniChar *w, *wEnd;
     char *p, *string;
     int oldLength;
 
@@ -250,12 +223,12 @@ Tcl_UniCharToUtfDString(wString, numChars, dsPtr)
      */
 
     oldLength = Tcl_DStringLength(dsPtr);
-    Tcl_DStringSetLength(dsPtr, (oldLength + numChars + 1) * TCL_UTF_MAX);
+    Tcl_DStringSetLength(dsPtr, (oldLength + uniLength + 1) * TCL_UTF_MAX);
     string = Tcl_DStringValue(dsPtr) + oldLength;
 
     p = string;
-    wEnd = wString + numChars;
-    for (w = wString; w < wEnd; ) {
+    wEnd = uniStr + uniLength;
+    for (w = uniStr; w < wEnd; ) {
 	p += Tcl_UniCharToUtf(*w, p);
 	w++;
     }
@@ -269,16 +242,16 @@ Tcl_UniCharToUtfDString(wString, numChars, dsPtr)
  *
  * Tcl_UtfToUniChar --
  *
- *	Extract the Tcl_UniChar represented by the UTF-8 string.  Bad
- *	UTF-8 sequences are converted to valid Tcl_UniChars and processing
- *	continues.  Equivalent to Plan 9 chartorune().
+ *	Extract the Tcl_UniChar represented by the UTF-8 string. Bad UTF-8
+ *	sequences are converted to valid Tcl_UniChars and processing
+ *	continues. Equivalent to Plan 9 chartorune().
  *
- *	The caller must ensure that the source buffer is long enough that
- *	this routine does not run off the end and dereference non-existent
- *	memory looking for trail bytes.  If the source buffer is known to
- *	be '\0' terminated, this cannot happen.  Otherwise, the caller
- *	should call Tcl_UtfCharComplete() before calling this routine to
- *	ensure that enough bytes remain in the string.
+ *	The caller must ensure that the source buffer is long enough that this
+ *	routine does not run off the end and dereference non-existent memory
+ *	looking for trail bytes. If the source buffer is known to be '\0'
+ *	terminated, this cannot happen. Otherwise, the caller should call
+ *	Tcl_UtfCharComplete() before calling this routine to ensure that
+ *	enough bytes remain in the string.
  *
  * Results:
  *	*chPtr is filled with the Tcl_UniChar, and the return value is the
@@ -289,20 +262,20 @@ Tcl_UniCharToUtfDString(wString, numChars, dsPtr)
  *
  *---------------------------------------------------------------------------
  */
- 
+
 int
-Tcl_UtfToUniChar(str, chPtr)
-    register CONST char *str;	 /* The UTF-8 string. */
-    register Tcl_UniChar *chPtr; /* Filled with the Tcl_UniChar represented
-				  * by the UTF-8 string. */
+Tcl_UtfToUniChar(
+    register const char *src,	/* The UTF-8 string. */
+    register Tcl_UniChar *chPtr)/* Filled with the Tcl_UniChar represented by
+				 * the UTF-8 string. */
 {
     register int byte;
-    
+
     /*
      * Unroll 1 to 3 byte UTF-8 sequences, use loop to handle longer ones.
      */
 
-    byte = *((unsigned char *) str);
+    byte = *((unsigned char *) src);
     if (byte < 0xC0) {
 	/*
 	 * Handles properly formed UTF-8 characters between 0x01 and 0x7F.
@@ -313,60 +286,51 @@ Tcl_UtfToUniChar(str, chPtr)
 	*chPtr = (Tcl_UniChar) byte;
 	return 1;
     } else if (byte < 0xE0) {
-	if ((str[1] & 0xC0) == 0x80) {
+	if ((src[1] & 0xC0) == 0x80) {
 	    /*
 	     * Two-byte-character lead-byte followed by a trail-byte.
 	     */
 
-	    *chPtr = (Tcl_UniChar) (((byte & 0x1F) << 6) | (str[1] & 0x3F));
+	    *chPtr = (Tcl_UniChar) (((byte & 0x1F) << 6) | (src[1] & 0x3F));
 	    return 2;
 	}
+
 	/*
 	 * A two-byte-character lead-byte not followed by trail-byte
 	 * represents itself.
 	 */
-
-	*chPtr = (Tcl_UniChar) byte;
-	return 1;
     } else if (byte < 0xF0) {
-	if (((str[1] & 0xC0) == 0x80) && ((str[2] & 0xC0) == 0x80)) {
+	if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80)) {
 	    /*
 	     * Three-byte-character lead byte followed by two trail bytes.
 	     */
 
-	    *chPtr = (Tcl_UniChar) (((byte & 0x0F) << 12) 
-		    | ((str[1] & 0x3F) << 6) | (str[2] & 0x3F));
+	    *chPtr = (Tcl_UniChar) (((byte & 0x0F) << 12)
+		    | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F));
 	    return 3;
 	}
+
 	/*
 	 * A three-byte-character lead-byte not followed by two trail-bytes
 	 * represents itself.
 	 */
-
-	*chPtr = (Tcl_UniChar) byte;
-	return 1;
     }
 #if TCL_UTF_MAX > 3
-    else {
-	int ch, total, trail;
-
-	total = totalBytes[byte];
-	trail = total - 1;
-	if (trail > 0) {
-	    ch = byte & (0x3F >> trail);
-	    do {
-		str++;
-		if ((*str & 0xC0) != 0x80) {
-		    *chPtr = byte;
-		    return 1;
-		}
-		ch <<= 6;
-		ch |= (*str & 0x3F);
-		trail--;
-	    } while (trail > 0);
-	    *chPtr = ch;
-	    return total;
+    else if (byte < 0xF8) {
+	if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) {
+	    /*
+	     * Four-byte-character lead byte followed by three trail bytes.
+	     */
+
+	    *chPtr = (Tcl_UniChar) (((byte & 0x0E) << 18) | ((src[1] & 0x3F) << 12)
+		    | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F));
+	    return 4;
 	}
+
+	/*
+	 * A three-byte-character lead-byte not followed by two trail-bytes
+	 * represents itself.
+	 */
     }
 #endif
 
@@ -383,9 +347,8 @@ Tcl_UtfToUniChar(str, chPtr)
  *
  * Results:
  *	The return value is a pointer to the Unicode representation of the
- *	UTF-8 string.  Storage for the return value is appended to the
- *	end of dsPtr.  The Unicode string is terminated with a Unicode
- *	NULL character.
+ *	UTF-8 string. Storage for the return value is appended to the end of
+ *	dsPtr. The Unicode string is terminated with a Unicode NULL character.
  *
  * Side effects:
  *	None.
@@ -394,35 +357,36 @@ Tcl_UtfToUniChar(str, chPtr)
  */
 
 Tcl_UniChar *
-Tcl_UtfToUniCharDString(string, length, dsPtr)
-    CONST char *string;		/* UTF-8 string to convert to Unicode. */
-    int length;			/* Length of UTF-8 string in bytes, or -1
-				 * for strlen(). */
-    Tcl_DString *dsPtr;		/* Unicode representation of string is
+Tcl_UtfToUniCharDString(
+    const char *src,		/* UTF-8 string to convert to Unicode. */
+    int length,			/* Length of UTF-8 string in bytes, or -1 for
+				 * strlen(). */
+    Tcl_DString *dsPtr)		/* Unicode representation of string is
 				 * appended to this previously initialized
 				 * DString. */
 {
     Tcl_UniChar *w, *wString;
-    CONST char *p, *end;
+    const char *p, *end;
     int oldLength;
 
     if (length < 0) {
-	length = strlen(string);
+	length = strlen(src);
     }
 
     /*
-     * Unicode string length in Tcl_UniChars will be <= UTF-8 string length
-     * in bytes.
+     * Unicode string length in Tcl_UniChars will be <= UTF-8 string length in
+     * bytes.
      */
 
     oldLength = Tcl_DStringLength(dsPtr);
+/* TODO: fix overreach! */
     Tcl_DStringSetLength(dsPtr,
 	    (int) ((oldLength + length + 1) * sizeof(Tcl_UniChar)));
     wString = (Tcl_UniChar *) (Tcl_DStringValue(dsPtr) + oldLength);
 
     w = wString;
-    end = string + length;
-    for (p = string; p < end; ) {
+    end = src + length;
+    for (p = src; p < end; ) {
 	p += TclUtfToUniChar(p, w);
 	w++;
     }
@@ -438,9 +402,9 @@ Tcl_UtfToUniCharDString(string, length, dsPtr)
  *
  * Tcl_UtfCharComplete --
  *
- *	Determine if the UTF-8 string of the given length is long enough
- *	to be decoded by Tcl_UtfToUniChar().  This does not ensure that the
- *	UTF-8 string is properly formed.  Equivalent to Plan 9 fullrune().
+ *	Determine if the UTF-8 string of the given length is long enough to be
+ *	decoded by Tcl_UtfToUniChar(). This does not ensure that the UTF-8
+ *	string is properly formed. Equivalent to Plan 9 fullrune().
  *
  * Results:
  *	The return value is 0 if the string is not long enough, non-zero
@@ -453,15 +417,15 @@ Tcl_UtfToUniCharDString(string, length, dsPtr)
  */
 
 int
-Tcl_UtfCharComplete(str, len)
-    CONST char *str;		/* String to check if first few bytes
-				 * contain a complete UTF-8 character. */
-    int len;			/* Length of above string in bytes. */
+Tcl_UtfCharComplete(
+    const char *src,		/* String to check if first few bytes contain
+				 * a complete UTF-8 character. */
+    int length)			/* Length of above string in bytes. */
 {
     int ch;
 
-    ch = *((unsigned char *) str);
-    return len >= totalBytes[ch];
+    ch = *((unsigned char *) src);
+    return length >= totalBytes[ch];
 }
 
 /*
@@ -469,23 +433,23 @@ Tcl_UtfCharComplete(str, len)
  *
  * Tcl_NumUtfChars --
  *
- *	Returns the number of characters (not bytes) in the UTF-8 string,
- *	not including the terminating NULL byte.  This is equivalent to
- *	Plan 9 utflen() and utfnlen().
+ *	Returns the number of characters (not bytes) in the UTF-8 string, not
+ *	including the terminating NULL byte. This is equivalent to Plan 9
+ *	utflen() and utfnlen().
  *
  * Results:
- *	As above.  
+ *	As above.
  *
  * Side effects:
  *	None.
  *
  *---------------------------------------------------------------------------
  */
- 
-int 
-Tcl_NumUtfChars(str, len)
-    register CONST char *str;	/* The UTF-8 string to measure. */
-    int len;			/* The length of the string in bytes, or -1
+
+int
+Tcl_NumUtfChars(
+    register const char *src,	/* The UTF-8 string to measure. */
+    int length)			/* The length of the string in bytes, or -1
 				 * for strlen(string). */
 {
     Tcl_UniChar ch;
@@ -495,27 +459,27 @@ Tcl_NumUtfChars(str, len)
     /*
      * The separate implementations are faster.
      *
-     * Since this is a time-sensitive function, we also do the check for
-     * the single-byte char case specially.
+     * Since this is a time-sensitive function, we also do the check for the
+     * single-byte char case specially.
      */
 
     i = 0;
-    if (len < 0) {
-	while (*str != '\0') {
-	    str += TclUtfToUniChar(str, chPtr);
+    if (length < 0) {
+	while (*src != '\0') {
+	    src += TclUtfToUniChar(src, chPtr);
 	    i++;
 	}
     } else {
 	register int n;
 
-	while (len > 0) {
-	    if (UCHAR(*str) < 0xC0) {
-		len--;
-		str++;
+	while (length > 0) {
+	    if (UCHAR(*src) < 0xC0) {
+		length--;
+		src++;
 	    } else {
-		n = Tcl_UtfToUniChar(str, chPtr);
-		len -= n;
-		str += n;
+		n = Tcl_UtfToUniChar(src, chPtr);
+		length -= n;
+		src += n;
 	    }
 	    i++;
 	}
@@ -528,37 +492,37 @@ Tcl_NumUtfChars(str, len)
  *
  * Tcl_UtfFindFirst --
  *
- *	Returns a pointer to the first occurance of the given Tcl_UniChar
- *	in the NULL-terminated UTF-8 string.  The NULL terminator is
- *	considered part of the UTF-8 string.  Equivalent to Plan 9
- *	utfrune().
+ *	Returns a pointer to the first occurance of the given Tcl_UniChar in
+ *	the NULL-terminated UTF-8 string. The NULL terminator is considered
+ *	part of the UTF-8 string. Equivalent to Plan 9 utfrune().
  *
  * Results:
- *	As above.  If the Tcl_UniChar does not exist in the given string,
- *	the return value is NULL.
+ *	As above. If the Tcl_UniChar does not exist in the given string, the
+ *	return value is NULL.
  *
  * Side effects:
  *	None.
  *
  *---------------------------------------------------------------------------
  */
-CONST char *
-Tcl_UtfFindFirst(string, ch)
-    CONST char *string;		/* The UTF-8 string to be searched. */
-    int ch;			/* The Tcl_UniChar to search for. */
+
+const char *
+Tcl_UtfFindFirst(
+    const char *src,		/* The UTF-8 string to be searched. */
+    int ch)			/* The Tcl_UniChar to search for. */
 {
     int len;
     Tcl_UniChar find;
-    
+
     while (1) {
-	len = TclUtfToUniChar(string, &find);
+	len = TclUtfToUniChar(src, &find);
 	if (find == ch) {
-	    return string;
+	    return src;
 	}
-	if (*string == '\0') {
+	if (*src == '\0') {
 	    return NULL;
 	}
-	string += len;
+	src += len;
     }
 }
 
@@ -567,14 +531,13 @@ Tcl_UtfFindFirst(string, ch)
  *
  * Tcl_UtfFindLast --
  *
- *	Returns a pointer to the last occurance of the given Tcl_UniChar
- *	in the NULL-terminated UTF-8 string.  The NULL terminator is
- *	considered part of the UTF-8 string.  Equivalent to Plan 9
- *	utfrrune().
+ *	Returns a pointer to the last occurance of the given Tcl_UniChar in
+ *	the NULL-terminated UTF-8 string. The NULL terminator is considered
+ *	part of the UTF-8 string. Equivalent to Plan 9 utfrrune().
  *
  * Results:
- *	As above.  If the Tcl_UniChar does not exist in the given string,
- *	the return value is NULL.
+ *	As above. If the Tcl_UniChar does not exist in the given string, the
+ *	return value is NULL.
  *
  * Side effects:
  *	None.
@@ -582,25 +545,25 @@ Tcl_UtfFindFirst(string, ch)
  *---------------------------------------------------------------------------
  */
 
-CONST char *
-Tcl_UtfFindLast(string, ch)
-    CONST char *string;		/* The UTF-8 string to be searched. */
-    int ch;			/* The Tcl_UniChar to search for. */
+const char *
+Tcl_UtfFindLast(
+    const char *src,		/* The UTF-8 string to be searched. */
+    int ch)			/* The Tcl_UniChar to search for. */
 {
     int len;
     Tcl_UniChar find;
-    CONST char *last;
-	
+    const char *last;
+
     last = NULL;
     while (1) {
-	len = TclUtfToUniChar(string, &find);
+	len = TclUtfToUniChar(src, &find);
 	if (find == ch) {
-	    last = string;
+	    last = src;
 	}
-	if (*string == '\0') {
+	if (*src == '\0') {
 	    break;
 	}
-	string += len;
+	src += len;
     }
     return last;
 }
@@ -610,28 +573,27 @@ Tcl_UtfFindLast(string, ch)
  *
  * Tcl_UtfNext --
  *
- *	Given a pointer to some current location in a UTF-8 string,
- *	move forward one character.  The caller must ensure that they
- *	are not asking for the next character after the last character
- *	in the string.
+ *	Given a pointer to some current location in a UTF-8 string, move
+ *	forward one character. The caller must ensure that they are not asking
+ *	for the next character after the last character in the string.
  *
  * Results:
- *	The return value is the pointer to the next character in
- *	the UTF-8 string.
+ *	The return value is the pointer to the next character in the UTF-8
+ *	string.
  *
  * Side effects:
  *	None.
  *
  *---------------------------------------------------------------------------
  */
- 
-CONST char *
-Tcl_UtfNext(str) 
-    CONST char *str;		    /* The current location in the string. */
+
+const char *
+Tcl_UtfNext(
+    const char *src)		/* The current location in the string. */
 {
     Tcl_UniChar ch;
 
-    return str + TclUtfToUniChar(str, &ch);
+    return src + TclUtfToUniChar(src, &ch);
 }
 
 /*
@@ -639,15 +601,15 @@ Tcl_UtfNext(str)
  *
  * Tcl_UtfPrev --
  *
- *	Given a pointer to some current location in a UTF-8 string,
- *	move backwards one character.  This works correctly when the
- *	pointer is in the middle of a UTF-8 character.
+ *	Given a pointer to some current location in a UTF-8 string, move
+ *	backwards one character. This works correctly when the pointer is in
+ *	the middle of a UTF-8 character.
  *
  * Results:
- *	The return value is a pointer to the previous character in the
- *	UTF-8 string.  If the current location was already at the
- *	beginning of the string, the return value will also be a
- *	pointer to the beginning of the string.
+ *	The return value is a pointer to the previous character in the UTF-8
+ *	string. If the current location was already at the beginning of the
+ *	string, the return value will also be a pointer to the beginning of
+ *	the string.
  *
  * Side effects:
  *	None.
@@ -655,22 +617,21 @@ Tcl_UtfNext(str)
  *---------------------------------------------------------------------------
  */
 
-CONST char *
-Tcl_UtfPrev(str, start)
-    CONST char *str;		    /* The current location in the string. */
-    CONST char *start;		    /* Pointer to the beginning of the
-				     * string, to avoid going backwards too
-				     * far. */
+const char *
+Tcl_UtfPrev(
+    const char *src,		/* The current location in the string. */
+    const char *start)		/* Pointer to the beginning of the string, to
+				 * avoid going backwards too far. */
 {
-    CONST char *look;
+    const char *look;
     int i, byte;
-    
-    str--;
-    look = str;
+
+    src--;
+    look = src;
     for (i = 0; i < TCL_UTF_MAX; i++) {
 	if (look < start) {
-	    if (str < start) {
-		str = start;
+	    if (src < start) {
+		src = start;
 	    }
 	    break;
 	}
@@ -683,16 +644,16 @@ Tcl_UtfPrev(str, start)
 	}
 	look--;
     }
-    return str;
+    return src;
 }
-	
+
 /*
  *---------------------------------------------------------------------------
  *
  * Tcl_UniCharAtIndex --
  *
- *	Returns the Unicode character represented at the specified
- *	character (not byte) position in the UTF-8 string.
+ *	Returns the Unicode character represented at the specified character
+ *	(not byte) position in the UTF-8 string.
  *
  * Results:
  *	As above.
@@ -702,13 +663,13 @@ Tcl_UtfPrev(str, start)
  *
  *---------------------------------------------------------------------------
  */
- 
+
 Tcl_UniChar
-Tcl_UniCharAtIndex(src, index)
-    register CONST char *src;	/* The UTF-8 string to dereference. */
-    register int index;		/* The position of the desired character. */
+Tcl_UniCharAtIndex(
+    register const char *src,	/* The UTF-8 string to dereference. */
+    register int index)		/* The position of the desired character. */
 {
-    Tcl_UniChar ch;
+    Tcl_UniChar ch = 0;
 
     while (index >= 0) {
 	index--;
@@ -722,8 +683,8 @@ Tcl_UniCharAtIndex(src, index)
  *
  * Tcl_UtfAtIndex --
  *
- *	Returns a pointer to the specified character (not byte) position
- *	in the UTF-8 string.
+ *	Returns a pointer to the specified character (not byte) position in
+ *	the UTF-8 string.
  *
  * Results:
  *	As above.
@@ -734,13 +695,13 @@ Tcl_UniCharAtIndex(src, index)
  *---------------------------------------------------------------------------
  */
 
-CONST char *
-Tcl_UtfAtIndex(src, index)
-    register CONST char *src;	/* The UTF-8 string. */
-    register int index;		/* The position of the desired character. */
+const char *
+Tcl_UtfAtIndex(
+    register const char *src,	/* The UTF-8 string. */
+    register int index)		/* The position of the desired character. */
 {
     Tcl_UniChar ch;
-    
+
     while (index > 0) {
 	index--;
 	src += TclUtfToUniChar(src, &ch);
@@ -757,31 +718,30 @@ Tcl_UtfAtIndex(src, index)
  *
  * Results:
  *	Stores the bytes represented by the backslash sequence in dst and
- *	returns the number of bytes written to dst.  At most TCL_UTF_MAX
- *	bytes are written to dst; dst must have been large enough to accept
- *	those bytes.  If readPtr isn't NULL then it is filled in with a
- *	count of the number of bytes in the backslash sequence.  
+ *	returns the number of bytes written to dst. At most TCL_UTF_MAX bytes
+ *	are written to dst; dst must have been large enough to accept those
+ *	bytes. If readPtr isn't NULL then it is filled in with a count of the
+ *	number of bytes in the backslash sequence.
  *
  * Side effects:
- *	The maximum number of bytes it takes to represent a Unicode
- *	character in UTF-8 is guaranteed to be less than the number of
- *	bytes used to express the backslash sequence that represents
- *	that Unicode character.  If the target buffer into which the
- *	caller is going to store the bytes that represent the Unicode
- *	character is at least as large as the source buffer from which
- *	the backslashed sequence was extracted, no buffer overruns should
- *	occur.
+ *	The maximum number of bytes it takes to represent a Unicode character
+ *	in UTF-8 is guaranteed to be less than the number of bytes used to
+ *	express the backslash sequence that represents that Unicode character.
+ *	If the target buffer into which the caller is going to store the bytes
+ *	that represent the Unicode character is at least as large as the
+ *	source buffer from which the backslashed sequence was extracted, no
+ *	buffer overruns should occur.
  *
  *---------------------------------------------------------------------------
  */
 
 int
-Tcl_UtfBackslash(src, readPtr, dst)
-    CONST char *src;		/* Points to the backslash character of
-				 * a backslash sequence. */
-    int *readPtr;		/* Fill in with number of characters read
-				 * from src, unless NULL. */
-    char *dst;			/* Filled with the bytes represented by the
+Tcl_UtfBackslash(
+    const char *src,		/* Points to the backslash character of a
+				 * backslash sequence. */
+    int *readPtr,		/* Fill in with number of characters read from
+				 * src, unless NULL. */
+    char *dst)			/* Filled with the bytes represented by the
 				 * backslash sequence. */
 {
 #define LINE_LENGTH 128
@@ -790,7 +750,10 @@ Tcl_UtfBackslash(src, readPtr, dst)
 
     result = TclParseBackslash(src, LINE_LENGTH, &numRead, dst);
     if (numRead == LINE_LENGTH) {
-	/* We ate a whole line.  Pay the price of a strlen() */
+	/*
+	 * We ate a whole line. Pay the price of a strlen()
+	 */
+
 	result = TclParseBackslash(src, (int)strlen(src), &numRead, dst);
     }
     if (readPtr != NULL) {
@@ -804,12 +767,12 @@ Tcl_UtfBackslash(src, readPtr, dst)
  *
  * Tcl_UtfToUpper --
  *
- *	Convert lowercase characters to uppercase characters in a UTF
- *	string in place.  The conversion may shrink the UTF string.
+ *	Convert lowercase characters to uppercase characters in a UTF string
+ *	in place. The conversion may shrink the UTF string.
  *
  * Results:
- *	Returns the number of bytes in the resulting string
- *	excluding the trailing null.
+ *	Returns the number of bytes in the resulting string excluding the
+ *	trailing null.
  *
  * Side effects:
  *	Writes a terminating null after the last converted character.
@@ -818,8 +781,8 @@ Tcl_UtfBackslash(src, readPtr, dst)
  */
 
 int
-Tcl_UtfToUpper(str)
-    char *str;			/* String to convert in place. */
+Tcl_UtfToUpper(
+    char *str)			/* String to convert in place. */
 {
     Tcl_UniChar ch, upChar;
     char *src, *dst;
@@ -831,16 +794,16 @@ Tcl_UtfToUpper(str)
 
     src = dst = str;
     while (*src) {
-        bytes = TclUtfToUniChar(src, &ch);
+	bytes = TclUtfToUniChar(src, &ch);
 	upChar = Tcl_UniCharToUpper(ch);
 
 	/*
-	 * To keep badly formed Utf strings from getting inflated by
-	 * the conversion (thereby causing a segfault), only copy the
-	 * upper case char to dst if its size is <= the original char.
+	 * To keep badly formed Utf strings from getting inflated by the
+	 * conversion (thereby causing a segfault), only copy the upper case
+	 * char to dst if its size is <= the original char.
 	 */
-	
-	if (bytes < UtfCount(upChar)) {
+
+	if (bytes < TclUtfCount(upChar)) {
 	    memcpy(dst, src, (size_t) bytes);
 	    dst += bytes;
 	} else {
@@ -857,12 +820,12 @@ Tcl_UtfToUpper(str)
  *
  * Tcl_UtfToLower --
  *
- *	Convert uppercase characters to lowercase characters in a UTF
- *	string in place.  The conversion may shrink the UTF string.
+ *	Convert uppercase characters to lowercase characters in a UTF string
+ *	in place. The conversion may shrink the UTF string.
  *
  * Results:
- *	Returns the number of bytes in the resulting string
- *	excluding the trailing null.
+ *	Returns the number of bytes in the resulting string excluding the
+ *	trailing null.
  *
  * Side effects:
  *	Writes a terminating null after the last converted character.
@@ -871,13 +834,13 @@ Tcl_UtfToUpper(str)
  */
 
 int
-Tcl_UtfToLower(str)
-    char *str;			/* String to convert in place. */
+Tcl_UtfToLower(
+    char *str)			/* String to convert in place. */
 {
     Tcl_UniChar ch, lowChar;
     char *src, *dst;
     int bytes;
-    
+
     /*
      * Iterate over the string until we hit the terminating null.
      */
@@ -888,12 +851,12 @@ Tcl_UtfToLower(str)
 	lowChar = Tcl_UniCharToLower(ch);
 
 	/*
-	 * To keep badly formed Utf strings from getting inflated by
-	 * the conversion (thereby causing a segfault), only copy the
-	 * lower case char to dst if its size is <= the original char.
+	 * To keep badly formed Utf strings from getting inflated by the
+	 * conversion (thereby causing a segfault), only copy the lower case
+	 * char to dst if its size is <= the original char.
 	 */
-	
-	if (bytes < UtfCount(lowChar)) {
+
+	if (bytes < TclUtfCount(lowChar)) {
 	    memcpy(dst, src, (size_t) bytes);
 	    dst += bytes;
 	} else {
@@ -910,13 +873,13 @@ Tcl_UtfToLower(str)
  *
  * Tcl_UtfToTitle --
  *
- *	Changes the first character of a UTF string to title case or
- *	uppercase and the rest of the string to lowercase.  The
- *	conversion happens in place and may shrink the UTF string.
+ *	Changes the first character of a UTF string to title case or uppercase
+ *	and the rest of the string to lowercase. The conversion happens in
+ *	place and may shrink the UTF string.
  *
  * Results:
- *	Returns the number of bytes in the resulting string
- *	excluding the trailing null.
+ *	Returns the number of bytes in the resulting string excluding the
+ *	trailing null.
  *
  * Side effects:
  *	Writes a terminating null after the last converted character.
@@ -925,13 +888,13 @@ Tcl_UtfToLower(str)
  */
 
 int
-Tcl_UtfToTitle(str)
-    char *str;			/* String to convert in place. */
+Tcl_UtfToTitle(
+    char *str)			/* String to convert in place. */
 {
     Tcl_UniChar ch, titleChar, lowChar;
     char *src, *dst;
     int bytes;
-    
+
     /*
      * Capitalize the first character and then lowercase the rest of the
      * characters until we get to a null.
@@ -943,7 +906,7 @@ Tcl_UtfToTitle(str)
 	bytes = TclUtfToUniChar(src, &ch);
 	titleChar = Tcl_UniCharToTitle(ch);
 
-	if (bytes < UtfCount(titleChar)) {
+	if (bytes < TclUtfCount(titleChar)) {
 	    memcpy(dst, src, (size_t) bytes);
 	    dst += bytes;
 	} else {
@@ -955,7 +918,7 @@ Tcl_UtfToTitle(str)
 	bytes = TclUtfToUniChar(src, &ch);
 	lowChar = Tcl_UniCharToLower(ch);
 
-	if (bytes < UtfCount(lowChar)) {
+	if (bytes < TclUtfCount(lowChar)) {
 	    memcpy(dst, src, (size_t) bytes);
 	    dst += bytes;
 	} else {
@@ -972,8 +935,8 @@ Tcl_UtfToTitle(str)
  *
  * TclpUtfNcmp2 --
  *
- *	Compare at most n bytes of utf-8 strings cs and ct.  Both cs
- *	and ct are assumed to be at least n bytes long.
+ *	Compare at most numBytes bytes of utf-8 strings cs and ct. Both cs and
+ *	ct are assumed to be at least numBytes bytes long.
  *
  * Results:
  *	Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
@@ -985,26 +948,28 @@ Tcl_UtfToTitle(str)
  */
 
 int
-TclpUtfNcmp2(cs, ct, n)
-    CONST char *cs;		/* UTF string to compare to ct. */
-    CONST char *ct;		/* UTF string cs is compared to. */
-    unsigned long n;		/* Number of *bytes* to compare. */
+TclpUtfNcmp2(
+    const char *cs,		/* UTF string to compare to ct. */
+    const char *ct,		/* UTF string cs is compared to. */
+    unsigned long numBytes)	/* Number of *bytes* to compare. */
 {
     /*
-     * We can't simply call 'memcmp(cs, ct, n);' because we need to check
-     * for Tcl's \xC0\x80 non-utf-8 null encoding.
-     * Otherwise utf-8 lexes fine in the strcmp manner.
+     * We can't simply call 'memcmp(cs, ct, numBytes);' because we need to
+     * check for Tcl's \xC0\x80 non-utf-8 null encoding. Otherwise utf-8 lexes
+     * fine in the strcmp manner.
      */
+
     register int result = 0;
 
-    for ( ; n != 0; n--, cs++, ct++) {
+    for ( ; numBytes != 0; numBytes--, cs++, ct++) {
 	if (*cs != *ct) {
 	    result = UCHAR(*cs) - UCHAR(*ct);
 	    break;
 	}
     }
-    if (n && ((UCHAR(*cs) == 0xC0) || (UCHAR(*ct) == 0xC0))) {
+    if (numBytes && ((UCHAR(*cs) == 0xC0) || (UCHAR(*ct) == 0xC0))) {
 	unsigned char c1, c2;
+
 	c1 = ((UCHAR(*cs) == 0xC0) && (UCHAR(cs[1]) == 0x80)) ? 0 : UCHAR(*cs);
 	c2 = ((UCHAR(*ct) == 0xC0) && (UCHAR(ct[1]) == 0x80)) ? 0 : UCHAR(*ct);
 	result = (c1 - c2);
@@ -1017,8 +982,8 @@ TclpUtfNcmp2(cs, ct, n)
  *
  * Tcl_UtfNcmp --
  *
- *	Compare at most n UTF chars of string cs to string ct.  Both cs
- *	and ct are assumed to be at least n UTF chars long.
+ *	Compare at most numChars UTF chars of string cs to string ct. Both cs
+ *	and ct are assumed to be at least numChars UTF chars long.
  *
  * Results:
  *	Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
@@ -1030,23 +995,26 @@ TclpUtfNcmp2(cs, ct, n)
  */
 
 int
-Tcl_UtfNcmp(cs, ct, n)
-    CONST char *cs;		/* UTF string to compare to ct. */
-    CONST char *ct;		/* UTF string cs is compared to. */
-    unsigned long n;		/* Number of UTF chars to compare. */
+Tcl_UtfNcmp(
+    const char *cs,		/* UTF string to compare to ct. */
+    const char *ct,		/* UTF string cs is compared to. */
+    unsigned long numChars)	/* Number of UTF chars to compare. */
 {
     Tcl_UniChar ch1, ch2;
+
     /*
-     * Cannot use 'memcmp(cs, ct, n);' as byte representation of
-     * \u0000 (the pair of bytes 0xc0,0x80) is larger than byte
-     * representation of \u0001 (the byte 0x01.)
+     * Cannot use 'memcmp(cs, ct, n);' as byte representation of \u0000 (the
+     * pair of bytes 0xc0,0x80) is larger than byte representation of \u0001
+     * (the byte 0x01.)
      */
-    while (n-- > 0) {
+
+    while (numChars-- > 0) {
 	/*
-	 * n must be interpreted as chars, not bytes.
-	 * This should be called only when both strings are of
-	 * at least n chars long (no need for \0 check)
+	 * n must be interpreted as chars, not bytes. This should be called
+	 * only when both strings are of at least n chars long (no need for \0
+	 * check)
 	 */
+
 	cs += TclUtfToUniChar(cs, &ch1);
 	ct += TclUtfToUniChar(ct, &ch2);
 	if (ch1 != ch2) {
@@ -1061,9 +1029,9 @@ Tcl_UtfNcmp(cs, ct, n)
  *
  * Tcl_UtfNcasecmp --
  *
- *	Compare at most n UTF chars of string cs to string ct case
- *	insensitive.  Both cs and ct are assumed to be at least n
- *	UTF chars long.
+ *	Compare at most numChars UTF chars of string cs to string ct case
+ *	insensitive. Both cs and ct are assumed to be at least numChars UTF
+ *	chars long.
  *
  * Results:
  *	Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
@@ -1075,13 +1043,13 @@ Tcl_UtfNcmp(cs, ct, n)
  */
 
 int
-Tcl_UtfNcasecmp(cs, ct, n)
-    CONST char *cs;		/* UTF string to compare to ct. */
-    CONST char *ct;		/* UTF string cs is compared to. */
-    unsigned long n;			/* Number of UTF chars to compare. */
+Tcl_UtfNcasecmp(
+    const char *cs,		/* UTF string to compare to ct. */
+    const char *ct,		/* UTF string cs is compared to. */
+    unsigned long numChars)	/* Number of UTF chars to compare. */
 {
     Tcl_UniChar ch1, ch2;
-    while (n-- > 0) {
+    while (numChars-- > 0) {
 	/*
 	 * n must be interpreted as chars, not bytes.
 	 * This should be called only when both strings are of
@@ -1103,6 +1071,46 @@ Tcl_UtfNcasecmp(cs, ct, n)
 /*
  *----------------------------------------------------------------------
  *
+ * Tcl_UtfNcasecmp --
+ *
+ *	Compare UTF chars of string cs to string ct case insensitively.
+ *	Replacement for strcasecmp in Tcl core, in places where UTF-8 should
+ *	be handled.
+ *
+ * Results:
+ *	Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
+ *
+ * Side effects:
+ *	None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclUtfCasecmp(
+    const char *cs,		/* UTF string to compare to ct. */
+    const char *ct)		/* UTF string cs is compared to. */
+{
+    while (*cs && *ct) {
+	Tcl_UniChar ch1, ch2;
+
+	cs += TclUtfToUniChar(cs, &ch1);
+	ct += TclUtfToUniChar(ct, &ch2);
+	if (ch1 != ch2) {
+	    ch1 = Tcl_UniCharToLower(ch1);
+	    ch2 = Tcl_UniCharToLower(ch2);
+	    if (ch1 != ch2) {
+		return ch1 - ch2;
+	    }
+	}
+    }
+    return UCHAR(*cs) - UCHAR(*ct);
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
  * Tcl_UniCharToUpper --
  *
  *	Compute the uppercase equivalent of the given Unicode character.
@@ -1117,16 +1125,15 @@ Tcl_UtfNcasecmp(cs, ct, n)
  */
 
 Tcl_UniChar
-Tcl_UniCharToUpper(ch)
-    int ch;			/* Unicode character to convert. */
+Tcl_UniCharToUpper(
+    int ch)			/* Unicode character to convert. */
 {
     int info = GetUniCharInfo(ch);
 
     if (GetCaseType(info) & 0x04) {
-	return (Tcl_UniChar) (ch - GetDelta(info));
-    } else {
-	return ch;
+	ch -= GetDelta(info);
     }
+    return (Tcl_UniChar) ch;
 }
 
 /*
@@ -1146,16 +1153,15 @@ Tcl_UniCharToUpper(ch)
  */
 
 Tcl_UniChar
-Tcl_UniCharToLower(ch)
-    int ch;			/* Unicode character to convert. */
+Tcl_UniCharToLower(
+    int ch)			/* Unicode character to convert. */
 {
     int info = GetUniCharInfo(ch);
 
     if (GetCaseType(info) & 0x02) {
-	return (Tcl_UniChar) (ch + GetDelta(info));
-    } else {
-	return ch;
+	ch += GetDelta(info);
     }
+    return (Tcl_UniChar) ch;
 }
 
 /*
@@ -1175,8 +1181,8 @@ Tcl_UniCharToLower(ch)
  */
 
 Tcl_UniChar
-Tcl_UniCharToTitle(ch)
-    int ch;			/* Unicode character to convert. */
+Tcl_UniCharToTitle(
+    int ch)			/* Unicode character to convert. */
 {
     int info = GetUniCharInfo(ch);
     int mode = GetCaseType(info);
@@ -1186,12 +1192,11 @@ Tcl_UniCharToTitle(ch)
 	 * Subtract or add one depending on the original case.
 	 */
 
-	return (Tcl_UniChar) (ch + ((mode & 0x4) ? -1 : 1));
+	ch += ((mode & 0x4) ? -1 : 1);
     } else if (mode == 0x4) {
-	return (Tcl_UniChar) (ch - GetDelta(info));
-    } else {
-	return ch;
+	ch -= GetDelta(info);
     }
+    return (Tcl_UniChar) ch;
 }
 
 /*
@@ -1199,7 +1204,7 @@ Tcl_UniCharToTitle(ch)
  *
  * Tcl_UniCharLen --
  *
- *	Find the length of a UniChar string.  The str input must be null
+ *	Find the length of a UniChar string. The str input must be null
  *	terminated.
  *
  * Results:
@@ -1212,14 +1217,14 @@ Tcl_UniCharToTitle(ch)
  */
 
 int
-Tcl_UniCharLen(str)
-    CONST Tcl_UniChar *str;	/* Unicode string to find length of. */
+Tcl_UniCharLen(
+    const Tcl_UniChar *uniStr)	/* Unicode string to find length of. */
 {
     int len = 0;
-    
-    while (*str != '\0') {
+
+    while (*uniStr != '\0') {
 	len++;
-	str++;
+	uniStr++;
     }
     return len;
 }
@@ -1229,11 +1234,11 @@ Tcl_UniCharLen(str)
  *
  * Tcl_UniCharNcmp --
  *
- *	Compare at most n unichars of string cs to string ct.  Both cs
- *	and ct are assumed to be at least n unichars long.
+ *	Compare at most numChars unichars of string ucs to string uct.
+ *	Both ucs and uct are assumed to be at least numChars unichars long.
  *
  * Results:
- *	Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
+ *	Return <0 if ucs < uct, 0 if ucs == uct, or >0 if ucs > uct.
  *
  * Side effects:
  *	None.
@@ -1242,24 +1247,26 @@ Tcl_UniCharLen(str)
  */
 
 int
-Tcl_UniCharNcmp(cs, ct, n)
-    CONST Tcl_UniChar *cs;		/* Unicode string to compare to ct. */
-    CONST Tcl_UniChar *ct;		/* Unicode string cs is compared to. */
-    unsigned long n;			/* Number of unichars to compare. */
+Tcl_UniCharNcmp(
+    const Tcl_UniChar *ucs,	/* Unicode string to compare to uct. */
+    const Tcl_UniChar *uct,	/* Unicode string ucs is compared to. */
+    unsigned long numChars)	/* Number of unichars to compare. */
 {
 #ifdef WORDS_BIGENDIAN
     /*
      * We are definitely on a big-endian machine; memcmp() is safe
      */
-    return memcmp(cs, ct, n*sizeof(Tcl_UniChar));
+
+    return memcmp(ucs, uct, numChars*sizeof(Tcl_UniChar));
 
 #else /* !WORDS_BIGENDIAN */
     /*
      * We can't simply call memcmp() because that is not lexically correct.
      */
-    for ( ; n != 0; cs++, ct++, n--) {
-	if (*cs != *ct) {
-	    return (*cs - *ct);
+
+    for ( ; numChars != 0; ucs++, uct++, numChars--) {
+	if (*ucs != *uct) {
+	    return (*ucs - *uct);
 	}
     }
     return 0;
@@ -1271,12 +1278,12 @@ Tcl_UniCharNcmp(cs, ct, n)
  *
  * Tcl_UniCharNcasecmp --
  *
- *	Compare at most n unichars of string cs to string ct case
- *	insensitive.  Both cs and ct are assumed to be at least n
+ *	Compare at most numChars unichars of string ucs to string uct case
+ *	insensitive. Both ucs and uct are assumed to be at least numChars
  *	unichars long.
  *
  * Results:
- *	Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
+ *	Return <0 if ucs < uct, 0 if ucs == uct, or >0 if ucs > uct.
  *
  * Side effects:
  *	None.
@@ -1285,15 +1292,16 @@ Tcl_UniCharNcmp(cs, ct, n)
  */
 
 int
-Tcl_UniCharNcasecmp(cs, ct, n)
-    CONST Tcl_UniChar *cs;		/* Unicode string to compare to ct. */
-    CONST Tcl_UniChar *ct;		/* Unicode string cs is compared to. */
-    unsigned long n;			/* Number of unichars to compare. */
+Tcl_UniCharNcasecmp(
+    const Tcl_UniChar *ucs,	/* Unicode string to compare to uct. */
+    const Tcl_UniChar *uct,	/* Unicode string ucs is compared to. */
+    unsigned long numChars)	/* Number of unichars to compare. */
 {
-    for ( ; n != 0; n--, cs++, ct++) {
-	if (*cs != *ct) {
-	    Tcl_UniChar lcs = Tcl_UniCharToLower(*cs);
-	    Tcl_UniChar lct = Tcl_UniCharToLower(*ct);
+    for ( ; numChars != 0; numChars--, ucs++, uct++) {
+	if (*ucs != *uct) {
+	    Tcl_UniChar lcs = Tcl_UniCharToLower(*ucs);
+	    Tcl_UniChar lct = Tcl_UniCharToLower(*uct);
+
 	    if (lcs != lct) {
 		return (lcs - lct);
 	    }
@@ -1319,12 +1327,15 @@ Tcl_UniCharNcasecmp(cs, ct, n)
  */
 
 int
-Tcl_UniCharIsAlnum(ch)
-    int ch;			/* Unicode character to test. */
+Tcl_UniCharIsAlnum(
+    int ch)			/* Unicode character to test. */
 {
-    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
-
-    return (((ALPHA_BITS | DIGIT_BITS) >> category) & 1);
+#if TCL_UTF_MAX > 3
+    if (UNICODE_OUT_OF_RANGE(ch)) {
+	return 0;
+    }
+#endif
+    return (((ALPHA_BITS | DIGIT_BITS) >> GetCategory(ch)) & 1);
 }
 
 /*
@@ -1344,11 +1355,15 @@ Tcl_UniCharIsAlnum(ch)
  */
 
 int
-Tcl_UniCharIsAlpha(ch)
-    int ch;			/* Unicode character to test. */
+Tcl_UniCharIsAlpha(
+    int ch)			/* Unicode character to test. */
 {
-    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
-    return ((ALPHA_BITS >> category) & 1);
+#if TCL_UTF_MAX > 3
+    if (UNICODE_OUT_OF_RANGE(ch)) {
+	return 0;
+    }
+#endif
+    return ((ALPHA_BITS >> GetCategory(ch)) & 1);
 }
 
 /*
@@ -1368,10 +1383,22 @@ Tcl_UniCharIsAlpha(ch)
  */
 
 int
-Tcl_UniCharIsControl(ch)
-    int ch;			/* Unicode character to test. */
+Tcl_UniCharIsControl(
+    int ch)			/* Unicode character to test. */
 {
-    return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == CONTROL);
+#if TCL_UTF_MAX > 3
+    if (UNICODE_OUT_OF_RANGE(ch)) {
+	ch &= 0x1fffff;
+	if ((ch == 0xe0001) || ((ch >= 0xe0020) && (ch <= 0xe007f))) {
+	    return 1;
+	}
+	if ((ch >= 0xf0000) && ((ch & 0xffff) <= 0xfffd)) {
+	    return 1;
+	}
+	return 0;
+    }
+#endif
+    return ((CONTROL_BITS >> GetCategory(ch)) & 1);
 }
 
 /*
@@ -1391,11 +1418,15 @@ Tcl_UniCharIsControl(ch)
  */
 
 int
-Tcl_UniCharIsDigit(ch)
-    int ch;			/* Unicode character to test. */
+Tcl_UniCharIsDigit(
+    int ch)			/* Unicode character to test. */
 {
-    return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK)
-	    == DECIMAL_DIGIT_NUMBER);
+#if TCL_UTF_MAX > 3
+    if (UNICODE_OUT_OF_RANGE(ch)) {
+	return 0;
+    }
+#endif
+    return (GetCategory(ch) == DECIMAL_DIGIT_NUMBER);
 }
 
 /*
@@ -1415,11 +1446,16 @@ Tcl_UniCharIsDigit(ch)
  */
 
 int
-Tcl_UniCharIsGraph(ch)
-    int ch;			/* Unicode character to test. */
+Tcl_UniCharIsGraph(
+    int ch)			/* Unicode character to test. */
 {
-    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
-    return (((PRINT_BITS >> category) & 1) && ((unsigned char) ch != ' '));
+#if TCL_UTF_MAX > 3
+    if (UNICODE_OUT_OF_RANGE(ch)) {
+	ch &= 0x1fffff;
+	return (ch >= 0xe0100) && (ch <= 0xe01ef);
+    }
+#endif
+    return ((GRAPH_BITS >> GetCategory(ch)) & 1);
 }
 
 /*
@@ -1439,10 +1475,15 @@ Tcl_UniCharIsGraph(ch)
  */
 
 int
-Tcl_UniCharIsLower(ch)
-    int ch;			/* Unicode character to test. */
+Tcl_UniCharIsLower(
+    int ch)			/* Unicode character to test. */
 {
-    return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == LOWERCASE_LETTER);
+#if TCL_UTF_MAX > 3
+    if (UNICODE_OUT_OF_RANGE(ch)) {
+	return 0;
+    }
+#endif
+    return (GetCategory(ch) == LOWERCASE_LETTER);
 }
 
 /*
@@ -1462,11 +1503,16 @@ Tcl_UniCharIsLower(ch)
  */
 
 int
-Tcl_UniCharIsPrint(ch)
-    int ch;			/* Unicode character to test. */
+Tcl_UniCharIsPrint(
+    int ch)			/* Unicode character to test. */
 {
-    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
-    return ((PRINT_BITS >> category) & 1);
+#if TCL_UTF_MAX > 3
+    if (UNICODE_OUT_OF_RANGE(ch)) {
+	ch &= 0x1fffff;
+	return (ch >= 0xe0100) && (ch <= 0xe01ef);
+    }
+#endif
+    return (((GRAPH_BITS|SPACE_BITS) >> GetCategory(ch)) & 1);
 }
 
 /*
@@ -1486,11 +1532,15 @@ Tcl_UniCharIsPrint(ch)
  */
 
 int
-Tcl_UniCharIsPunct(ch)
-    int ch;			/* Unicode character to test. */
+Tcl_UniCharIsPunct(
+    int ch)			/* Unicode character to test. */
 {
-    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
-    return ((PUNCT_BITS >> category) & 1);
+#if TCL_UTF_MAX > 3
+    if (UNICODE_OUT_OF_RANGE(ch)) {
+	return 0;
+    }
+#endif
+    return ((PUNCT_BITS >> GetCategory(ch)) & 1);
 }
 
 /*
@@ -1510,10 +1560,16 @@ Tcl_UniCharIsPunct(ch)
  */
 
 int
-Tcl_UniCharIsSpace(ch)
-    int ch;			/* Unicode character to test. */
+Tcl_UniCharIsSpace(
+    int ch)			/* Unicode character to test. */
 {
-    register int category;
+#if TCL_UTF_MAX > 3
+    /* Ignore upper 11 bits. */
+    ch &= 0x1fffff;
+#else
+    /* Ignore upper 16 bits. */
+    ch &= 0xffff;
+#endif
 
     /*
      * If the character is within the first 127 characters, just use the
@@ -1521,10 +1577,16 @@ Tcl_UniCharIsSpace(ch)
      */
 
     if (ch < 0x80) {
-	return isspace(UCHAR(ch)); /* INTL: ISO space */
+	return TclIsSpaceProc((char) ch);
+#if TCL_UTF_MAX > 3
+    } else if (UNICODE_OUT_OF_RANGE(ch)) {
+	return 0;
+#endif
+    } else if (ch == 0x0085 || ch == 0x180e || ch == 0x200b
+	    || ch == 0x202f || ch == 0x2060 || ch == 0xfeff) {
+	return 1;
     } else {
-	category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
-	return ((SPACE_BITS >> category) & 1);
+	return ((SPACE_BITS >> GetCategory(ch)) & 1);
     }
 }
 
@@ -1545,10 +1607,15 @@ Tcl_UniCharIsSpace(ch)
  */
 
 int
-Tcl_UniCharIsUpper(ch)
-    int ch;			/* Unicode character to test. */
+Tcl_UniCharIsUpper(
+    int ch)			/* Unicode character to test. */
 {
-    return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == UPPERCASE_LETTER);
+#if TCL_UTF_MAX > 3
+    if (UNICODE_OUT_OF_RANGE(ch)) {
+	return 0;
+    }
+#endif
+    return (GetCategory(ch) == UPPERCASE_LETTER);
 }
 
 /*
@@ -1556,8 +1623,7 @@ Tcl_UniCharIsUpper(ch)
  *
  * Tcl_UniCharIsWordChar --
  *
- *	Test if a character is alphanumeric or a connector punctuation
- *	mark.
+ *	Test if a character is alphanumeric or a connector punctuation mark.
  *
  * Results:
  *	Returns 1 if character is a word character.
@@ -1569,12 +1635,15 @@ Tcl_UniCharIsUpper(ch)
  */
 
 int
-Tcl_UniCharIsWordChar(ch)
-    int ch;			/* Unicode character to test. */
+Tcl_UniCharIsWordChar(
+    int ch)			/* Unicode character to test. */
 {
-    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
-
-    return (((ALPHA_BITS | DIGIT_BITS | CONNECTOR_BITS) >> category) & 1);
+#if TCL_UTF_MAX > 3
+    if (UNICODE_OUT_OF_RANGE(ch)) {
+	return 0;
+    }
+#endif
+    return ((WORD_BITS >> GetCategory(ch)) & 1);
 }
 
 /*
@@ -1583,17 +1652,16 @@ Tcl_UniCharIsWordChar(ch)
  * Tcl_UniCharCaseMatch --
  *
  *	See if a particular Unicode string matches a particular pattern.
- *	Allows case insensitivity.  This is the Unicode equivalent of
- *	the char* Tcl_StringCaseMatch.  The UniChar strings must be
- *	NULL-terminated.  This has no provision for counted UniChar
- *	strings, thus should not be used where NULLs are expected in the
- *	UniChar string.  Use TclUniCharMatch where possible.
+ *	Allows case insensitivity. This is the Unicode equivalent of the char*
+ *	Tcl_StringCaseMatch. The UniChar strings must be NULL-terminated.
+ *	This has no provision for counted UniChar strings, thus should not be
+ *	used where NULLs are expected in the UniChar string. Use
+ *	TclUniCharMatch where possible.
  *
  * Results:
- *	The return value is 1 if string matches pattern, and
- *	0 otherwise.  The matching operation permits the following
- *	special characters in the pattern: *?\[] (see the manual
- *	entry for details on what these mean).
+ *	The return value is 1 if string matches pattern, and 0 otherwise. The
+ *	matching operation permits the following special characters in the
+ *	pattern: *?\[] (see the manual entry for details on what these mean).
  *
  * Side effects:
  *	None.
@@ -1602,44 +1670,48 @@ Tcl_UniCharIsWordChar(ch)
  */
 
 int
-Tcl_UniCharCaseMatch(string, pattern, nocase)
-    CONST Tcl_UniChar *string;	/* Unicode String. */
-    CONST Tcl_UniChar *pattern;	/* Pattern, which may contain special
+Tcl_UniCharCaseMatch(
+    const Tcl_UniChar *uniStr,	/* Unicode String. */
+    const Tcl_UniChar *uniPattern,
+				/* Pattern, which may contain special
 				 * characters. */
-    int nocase;			/* 0 for case sensitive, 1 for insensitive */
+    int nocase)			/* 0 for case sensitive, 1 for insensitive */
 {
     Tcl_UniChar ch1, p;
-    
+
     while (1) {
-	p = *pattern;
-	
+	p = *uniPattern;
+
 	/*
-	 * See if we're at the end of both the pattern and the string.  If
-	 * so, we succeeded.  If we're at the end of the pattern but not at
-	 * the end of the string, we failed.
+	 * See if we're at the end of both the pattern and the string. If so,
+	 * we succeeded. If we're at the end of the pattern but not at the end
+	 * of the string, we failed.
 	 */
-	
+
 	if (p == 0) {
-	    return (*string == 0);
+	    return (*uniStr == 0);
 	}
-	if ((*string == 0) && (p != '*')) {
+	if ((*uniStr == 0) && (p != '*')) {
 	    return 0;
 	}
 
 	/*
-	 * Check for a "*" as the next pattern character.  It matches any
-	 * substring.  We handle this by skipping all the characters up to the
+	 * Check for a "*" as the next pattern character. It matches any
+	 * substring. We handle this by skipping all the characters up to the
 	 * next matching one in the pattern, and then calling ourselves
 	 * recursively for each postfix of string, until either we match or we
 	 * reach the end of the string.
 	 */
-	
+
 	if (p == '*') {
 	    /*
 	     * Skip all successive *'s in the pattern
 	     */
-	    while (*(++pattern) == '*') {}
-	    p = *pattern;
+
+	    while (*(++uniPattern) == '*') {
+		/* empty body */
+	    }
+	    p = *uniPattern;
 	    if (p == 0) {
 		return 1;
 	    }
@@ -1652,63 +1724,67 @@ Tcl_UniCharCaseMatch(string, pattern, nocase)
 		 * quickly if the next char in the pattern isn't a special
 		 * character
 		 */
+
 		if ((p != '[') && (p != '?') && (p != '\\')) {
 		    if (nocase) {
-			while (*string && (p != *string)
-				&& (p != Tcl_UniCharToLower(*string))) {
-			    string++;
+			while (*uniStr && (p != *uniStr)
+				&& (p != Tcl_UniCharToLower(*uniStr))) {
+			    uniStr++;
 			}
 		    } else {
-			while (*string && (p != *string)) { string++; }
+			while (*uniStr && (p != *uniStr)) {
+			    uniStr++;
+			}
 		    }
 		}
-		if (Tcl_UniCharCaseMatch(string, pattern, nocase)) {
+		if (Tcl_UniCharCaseMatch(uniStr, uniPattern, nocase)) {
 		    return 1;
 		}
-		if (*string == 0) {
+		if (*uniStr == 0) {
 		    return 0;
 		}
-		string++;
+		uniStr++;
 	    }
 	}
 
 	/*
-	 * Check for a "?" as the next pattern character.  It matches
-	 * any single character.
+	 * Check for a "?" as the next pattern character. It matches any
+	 * single character.
 	 */
 
 	if (p == '?') {
-	    pattern++;
-	    string++;
+	    uniPattern++;
+	    uniStr++;
 	    continue;
 	}
 
 	/*
-	 * Check for a "[" as the next pattern character.  It is followed
-	 * by a list of characters that are acceptable, or by a range
-	 * (two characters separated by "-").
+	 * Check for a "[" as the next pattern character. It is followed by a
+	 * list of characters that are acceptable, or by a range (two
+	 * characters separated by "-").
 	 */
-	
+
 	if (p == '[') {
 	    Tcl_UniChar startChar, endChar;
 
-	    pattern++;
-	    ch1 = (nocase ? Tcl_UniCharToLower(*string) : *string);
-	    string++;
+	    uniPattern++;
+	    ch1 = (nocase ? Tcl_UniCharToLower(*uniStr) : *uniStr);
+	    uniStr++;
 	    while (1) {
-		if ((*pattern == ']') || (*pattern == 0)) {
+		if ((*uniPattern == ']') || (*uniPattern == 0)) {
 		    return 0;
 		}
-		startChar = (nocase ? Tcl_UniCharToLower(*pattern) : *pattern);
-		pattern++;
-		if (*pattern == '-') {
-		    pattern++;
-		    if (*pattern == 0) {
+		startChar = (nocase ? Tcl_UniCharToLower(*uniPattern)
+			: *uniPattern);
+		uniPattern++;
+		if (*uniPattern == '-') {
+		    uniPattern++;
+		    if (*uniPattern == 0) {
 			return 0;
 		    }
-		    endChar = (nocase ? Tcl_UniCharToLower(*pattern)
-			    : *pattern);
-		    pattern++;
+		    endChar = (nocase ? Tcl_UniCharToLower(*uniPattern)
+			    : *uniPattern);
+		    uniPattern++;
 		    if (((startChar <= ch1) && (ch1 <= endChar))
 			    || ((endChar <= ch1) && (ch1 <= startChar))) {
 			/*
@@ -1720,42 +1796,43 @@ Tcl_UniCharCaseMatch(string, pattern, nocase)
 		    break;
 		}
 	    }
-	    while (*pattern != ']') {
-		if (*pattern == 0) {
-		    pattern--;
+	    while (*uniPattern != ']') {
+		if (*uniPattern == 0) {
+		    uniPattern--;
 		    break;
 		}
-		pattern++;
+		uniPattern++;
 	    }
-	    pattern++;
+	    uniPattern++;
 	    continue;
 	}
 
 	/*
-	 * If the next pattern character is '\', just strip off the '\'
-	 * so we do exact matching on the character that follows.
+	 * If the next pattern character is '\', just strip off the '\' so we
+	 * do exact matching on the character that follows.
 	 */
 
 	if (p == '\\') {
-	    if (*(++pattern) == '\0') {
+	    if (*(++uniPattern) == '\0') {
 		return 0;
 	    }
 	}
 
 	/*
-	 * There's no special character.  Just make sure that the next
-	 * bytes of each string match.
+	 * There's no special character. Just make sure that the next bytes of
+	 * each string match.
 	 */
 
 	if (nocase) {
-	    if (Tcl_UniCharToLower(*string) != Tcl_UniCharToLower(*pattern)) {
+	    if (Tcl_UniCharToLower(*uniStr) !=
+		    Tcl_UniCharToLower(*uniPattern)) {
 		return 0;
 	    }
-	} else if (*string != *pattern) {
+	} else if (*uniStr != *uniPattern) {
 	    return 0;
 	}
-	string++;
-	pattern++;
+	uniStr++;
+	uniPattern++;
     }
 }
 
@@ -1765,15 +1842,14 @@ Tcl_UniCharCaseMatch(string, pattern, nocase)
  * TclUniCharMatch --
  *
  *	See if a particular Unicode string matches a particular pattern.
- *	Allows case insensitivity.  This is the Unicode equivalent of the
- *	char* Tcl_StringCaseMatch.  This variant of Tcl_UniCharCaseMatch
- *	uses counted Strings, so embedded NULLs are allowed.
+ *	Allows case insensitivity. This is the Unicode equivalent of the char*
+ *	Tcl_StringCaseMatch. This variant of Tcl_UniCharCaseMatch uses counted
+ *	Strings, so embedded NULLs are allowed.
  *
  * Results:
- *	The return value is 1 if string matches pattern, and
- *	0 otherwise.  The matching operation permits the following
- *	special characters in the pattern: *?\[] (see the manual
- *	entry for details on what these mean).
+ *	The return value is 1 if string matches pattern, and 0 otherwise. The
+ *	matching operation permits the following special characters in the
+ *	pattern: *?\[] (see the manual entry for details on what these mean).
  *
  * Side effects:
  *	None.
@@ -1782,25 +1858,25 @@ Tcl_UniCharCaseMatch(string, pattern, nocase)
  */
 
 int
-TclUniCharMatch(string, strLen, pattern, ptnLen, nocase)
-    CONST Tcl_UniChar *string;	/* Unicode String. */
-    int strLen;			/* length of String */
-    CONST Tcl_UniChar *pattern;	/* Pattern, which may contain special
+TclUniCharMatch(
+    const Tcl_UniChar *string,	/* Unicode String. */
+    int strLen,			/* Length of String */
+    const Tcl_UniChar *pattern,	/* Pattern, which may contain special
 				 * characters. */
-    int ptnLen;			/* length of Pattern */
-    int nocase;			/* 0 for case sensitive, 1 for insensitive */
+    int ptnLen,			/* Length of Pattern */
+    int nocase)			/* 0 for case sensitive, 1 for insensitive */
 {
-    CONST Tcl_UniChar *stringEnd, *patternEnd;
+    const Tcl_UniChar *stringEnd, *patternEnd;
     Tcl_UniChar p;
 
-    stringEnd  = string + strLen;
+    stringEnd = string + strLen;
     patternEnd = pattern + ptnLen;
 
     while (1) {
 	/*
-	 * See if we're at the end of both the pattern and the string.  If
-	 * so, we succeeded.  If we're at the end of the pattern but not at
-	 * the end of the string, we failed.
+	 * See if we're at the end of both the pattern and the string. If so,
+	 * we succeeded. If we're at the end of the pattern but not at the end
+	 * of the string, we failed.
 	 */
 
 	if (pattern == patternEnd) {
@@ -1812,18 +1888,21 @@ TclUniCharMatch(string, strLen, pattern, ptnLen, nocase)
 	}
 
 	/*
-	 * Check for a "*" as the next pattern character.  It matches any
-	 * substring.  We handle this by skipping all the characters up to the
+	 * Check for a "*" as the next pattern character. It matches any
+	 * substring. We handle this by skipping all the characters up to the
 	 * next matching one in the pattern, and then calling ourselves
 	 * recursively for each postfix of string, until either we match or we
 	 * reach the end of the string.
 	 */
-	
+
 	if (p == '*') {
 	    /*
-	     * Skip all successive *'s in the pattern
+	     * Skip all successive *'s in the pattern.
 	     */
-	    while (*(++pattern) == '*') {}
+
+	    while (*(++pattern) == '*') {
+		/* empty body */
+	    }
 	    if (pattern == patternEnd) {
 		return 1;
 	    }
@@ -1835,8 +1914,9 @@ TclUniCharMatch(string, strLen, pattern, ptnLen, nocase)
 		/*
 		 * Optimization for matching - cruise through the string
 		 * quickly if the next char in the pattern isn't a special
-		 * character
+		 * character.
 		 */
+
 		if ((p != '[') && (p != '?') && (p != '\\')) {
 		    if (nocase) {
 			while ((string < stringEnd) && (p != *string)
@@ -1861,8 +1941,8 @@ TclUniCharMatch(string, strLen, pattern, ptnLen, nocase)
 	}
 
 	/*
-	 * Check for a "?" as the next pattern character.  It matches
-	 * any single character.
+	 * Check for a "?" as the next pattern character. It matches any
+	 * single character.
 	 */
 
 	if (p == '?') {
@@ -1872,11 +1952,11 @@ TclUniCharMatch(string, strLen, pattern, ptnLen, nocase)
 	}
 
 	/*
-	 * Check for a "[" as the next pattern character.  It is followed
-	 * by a list of characters that are acceptable, or by a range
-	 * (two characters separated by "-").
+	 * Check for a "[" as the next pattern character. It is followed by a
+	 * list of characters that are acceptable, or by a range (two
+	 * characters separated by "-").
 	 */
-	
+
 	if (p == '[') {
 	    Tcl_UniChar ch1, startChar, endChar;
 
@@ -1920,8 +2000,8 @@ TclUniCharMatch(string, strLen, pattern, ptnLen, nocase)
 	}
 
 	/*
-	 * If the next pattern character is '\', just strip off the '\'
-	 * so we do exact matching on the character that follows.
+	 * If the next pattern character is '\', just strip off the '\' so we
+	 * do exact matching on the character that follows.
 	 */
 
 	if (p == '\\') {
@@ -1931,8 +2011,8 @@ TclUniCharMatch(string, strLen, pattern, ptnLen, nocase)
 	}
 
 	/*
-	 * There's no special character.  Just make sure that the next
-	 * bytes of each string match.
+	 * There's no special character. Just make sure that the next bytes of
+	 * each string match.
 	 */
 
 	if (nocase) {
@@ -1946,3 +2026,11 @@ TclUniCharMatch(string, strLen, pattern, ptnLen, nocase)
 	pattern++;
     }
 }
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */