1 files changed, 346 insertions, 1402 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 2fa0e80..e5497a4 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -3,7 +3,7 @@
  *
  *	Routines for manipulating UTF-8 strings.
  *
- * Copyright © 1997-1998 Sun Microsystems, Inc.
+ * Copyright (c) 1997-1998 Sun Microsystems, Inc.
  *
  * See the file "license.terms" for information on usage and redistribution of
  * this file, and for a DISCLAIMER OF ALL WARRANTIES.
@@ -26,7 +26,7 @@
 #define ALPHA_BITS ((1 << UPPERCASE_LETTER) | (1 << LOWERCASE_LETTER) \
 	| (1 << TITLECASE_LETTER) | (1 << MODIFIER_LETTER) | (1<<OTHER_LETTER))
 
-#define CONTROL_BITS ((1 << CONTROL) | (1 << FORMAT))
+#define CONTROL_BITS ((1 << CONTROL) | (1 << FORMAT) | (1 << PRIVATE_USE))
 
 #define DIGIT_BITS (1 << DECIMAL_DIGIT_NUMBER)
 
@@ -55,44 +55,46 @@
 #define UNICODE_SELF	0x80
 
 /*
- * The following structures are used when mapping between Unicode and
+ * The following structures are used when mapping between Unicode (UCS-2) and
  * UTF-8.
  */
 
-static const unsigned char totalBytes[256] = {
+static CONST unsigned char totalBytes[256] = {
     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-    2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1
+    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+#if TCL_UTF_MAX > 3
+    4,4,4,4,4,4,4,4,
+#else
+    1,1,1,1,1,1,1,1,
+#endif
+#if TCL_UTF_MAX > 4
+    5,5,5,5,
+#else
+    1,1,1,1,
+#endif
+#if TCL_UTF_MAX > 5
+    6,6,6,6
+#else
+    1,1,1,1
+#endif
 };
 
-static const unsigned char complete[256] = {
-    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-/* Tcl_UtfCharComplete() might point to 2nd byte of valid 4-byte sequence */
-    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
-    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
-/* End of "continuation byte section" */
-    2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1
-};
-
 /*
  * Functions used only in this module.
  */
 
-static int		Invalid(const char *src);
+static int		UtfCount(int ch);
 
 /*
  *---------------------------------------------------------------------------
  *
- * TclUtfCount --
+ * UtfCount --
  *
  *	Find the number of bytes in the Utf character "ch".
  *
@@ -105,98 +107,44 @@ static int		Invalid(const char *src);
  *---------------------------------------------------------------------------
  */
 
-int
-TclUtfCount(
-    int ch)			/* The Unicode character whose size is returned. */
+INLINE static int
+UtfCount(
+    int ch)			/* The Tcl_UniChar whose size is returned. */
 {
-    if ((unsigned)(ch - 1) < (UNICODE_SELF - 1)) {
+    if ((ch > 0) && (ch < UNICODE_SELF)) {
 	return 1;
     }
     if (ch <= 0x7FF) {
 	return 2;
     }
-    if (((unsigned)(ch - 0x10000) <= 0xFFFFF)) {
+    if (ch <= 0xFFFF) {
+	return 3;
+    }
+#if TCL_UTF_MAX > 3
+    if (ch <= 0x1FFFFF) {
 	return 4;
     }
+    if (ch <= 0x3FFFFFF) {
+	return 5;
+    }
+    if (ch <= 0x7FFFFFFF) {
+	return 6;
+    }
+#endif
     return 3;
 }
-
-/*
- *---------------------------------------------------------------------------
- *
- * Invalid --
- *
- *	Given a pointer to a two-byte prefix of a well-formed UTF-8 byte
- *	sequence (a lead byte followed by a trail byte) this routine
- *	examines those two bytes to determine whether the sequence is
- *	invalid in UTF-8.  This might be because it is an overlong
- *	encoding, or because it encodes something out of the proper range.
- *
- *	Given a pointer to the bytes \xF8 or \xFC , this routine will
- *	try to read beyond the end of the "bounds" table.  Callers must
- *	prevent this.
- *
- *	Given a pointer to something else (an ASCII byte, a trail byte,
- *	or another byte	that can never begin a valid byte sequence such
- *	as \xF5) this routine returns false.  That makes the routine poorly
- *	named, as it does not detect and report all invalid sequences.
- *
- *	Callers have to take care that this routine does something useful
- *	for their needs.
- *
- * Results:
- *	A boolean.
- *---------------------------------------------------------------------------
- */
 
-static const unsigned char bounds[28] = {
-    0x80, 0x80,		/* \xC0 accepts \x80 only */
-    0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF,
-    0x80, 0xBF,		/* (\xC4 - \xDC) -- all sequences valid */
-    0xA0, 0xBF,	/* \xE0\x80 through \xE0\x9F are invalid prefixes */
-    0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, /* (\xE4 - \xEC) -- all valid */
-    0x90, 0xBF,	/* \xF0\x80 through \xF0\x8F are invalid prefixes */
-    0x80, 0x8F  /* \xF4\x90 and higher are invalid prefixes */
-};
-
-static int
-Invalid(
-    const char *src)	/* Points to lead byte of a UTF-8 byte sequence */
-{
-    unsigned char byte = UCHAR(*src);
-    int index;
-
-    if ((byte & 0xC3) == 0xC0) {
-	/* Only lead bytes 0xC0, 0xE0, 0xF0, 0xF4 need examination */
-	index = (byte - 0xC0) >> 1;
-	if (UCHAR(src[1]) < bounds[index] || UCHAR(src[1]) > bounds[index+1]) {
-	    /* Out of bounds - report invalid. */
-	    return 1;
-	}
-    }
-    return 0;
-}
-
 /*
  *---------------------------------------------------------------------------
  *
  * Tcl_UniCharToUtf --
  *
- *	Stores the given Tcl_UniChar as a sequence of UTF-8 bytes in the provided
- *	buffer. Equivalent to Plan 9 runetochar().
- *
- *	Surrogate pairs are handled as follows: When ch is a high surrogate,
- *	the first byte of the 4-byte UTF-8 sequence is stored in the buffer and
- *	the function returns 1. If the function is called again with a low
- *	surrogate and the same buffer, the remaining 3 bytes of the 4-byte
- *	UTF-8 sequence are produced.
- *
- *	If no low surrogate follows the high surrogate (which is actually illegal),
- *	calling Tcl_UniCharToUtf again with ch being -1 produces a 3-byte UTF-8
- *	sequence representing the high surrogate.
+ *	Store the given Tcl_UniChar as a sequence of UTF-8 bytes in the
+ *	provided buffer. Equivalent to Plan 9 runetochar().
  *
  * Results:
- *	Returns the number of bytes stored into the buffer.
+ *	The return values is the number of bytes in the buffer that were
+ *	consumed.
  *
  * Side effects:
  *	None.
@@ -204,84 +152,63 @@ Invalid(
  *---------------------------------------------------------------------------
  */
 
-Tcl_Size
+INLINE int
 Tcl_UniCharToUtf(
-    int ch,	/* The Tcl_UniChar to be stored in the
-		 * buffer.
-		 */
-    char *buf)	/* Buffer in which the UTF-8 representation of
-		 * ch is stored. Must be large enough to hold the UTF-8
-		 * character (at most 4 bytes).
-		 */
+    int ch,			/* The Tcl_UniChar to be stored in the
+				 * buffer. */
+    char *buf)			/* Buffer in which the UTF-8 representation of
+				 * the Tcl_UniChar is stored. Buffer must be
+				 * large enough to hold the UTF-8 character
+				 * (at most TCL_UTF_MAX bytes). */
 {
-    if ((unsigned)(ch - 1) < (UNICODE_SELF - 1)) {
+    if ((ch > 0) && (ch < UNICODE_SELF)) {
 	buf[0] = (char) ch;
 	return 1;
     }
     if (ch >= 0) {
 	if (ch <= 0x7FF) {
-	    buf[1] = (char) (0x80 | (0x3F & ch));
-	    buf[0] = (char) (0xC0 | (ch >> 6));
+	    buf[1] = (char) ((ch | 0x80) & 0xBF);
+	    buf[0] = (char) ((ch >> 6) | 0xC0);
 	    return 2;
 	}
 	if (ch <= 0xFFFF) {
-	    if ((ch & 0xF800) == 0xD800) {
-		if (ch & 0x0400) {
-		    /* Low surrogate */
-		    if (   (0x80 == (0xC0 & buf[0]))
-			&& (0    == (0xCF & buf[1]))) {
-			/* Previous Tcl_UniChar was a high surrogate, so combine */
-			buf[2]  = (char) (0x80 | (0x3F & ch));
-			buf[1] |= (char) (0x80 | (0x0F & (ch >> 6)));
-			return 3;
-		    }
-		    /* Previous Tcl_UniChar was not a high surrogate, so just output */
-		} else {
-		    /* High surrogate */
-
-		    /* Add 0x10000 to the raw number encoded in the surrogate
-		     * pair in order to get the code point.
-		    */
-		    ch += 0x40;
-
-		    /* Fill buffer with specific 3-byte (invalid) byte combination,
-		       so following low surrogate can recognize it and combine */
-		    buf[2] = (char) ((ch << 4) & 0x30);
-		    buf[1] = (char) (0x80 | (0x3F & (ch >> 2)));
-		    buf[0] = (char) (0xF0 | (0x07 & (ch >> 8)));
-		    return 1;
-		}
-	    }
-	    goto three;
+	three:
+	    buf[2] = (char) ((ch | 0x80) & 0xBF);
+	    buf[1] = (char) (((ch >> 6) | 0x80) & 0xBF);
+	    buf[0] = (char) ((ch >> 12) | 0xE0);
+	    return 3;
 	}
-	if (ch <= 0x10FFFF) {
-	    buf[3] = (char) (0x80 | (0x3F & ch));
-	    buf[2] = (char) (0x80 | (0x3F & (ch >> 6)));
-	    buf[1] = (char) (0x80 | (0x3F & (ch >> 12)));
-	    buf[0] = (char) (0xF0 |         (ch >> 18));
+
+#if TCL_UTF_MAX > 3
+	if (ch <= 0x1FFFFF) {
+	    buf[3] = (char) ((ch | 0x80) & 0xBF);
+	    buf[2] = (char) (((ch >> 6) | 0x80) & 0xBF);
+	    buf[1] = (char) (((ch >> 12) | 0x80) & 0xBF);
+	    buf[0] = (char) ((ch >> 18) | 0xF0);
 	    return 4;
 	}
-    } else if (ch == -1) {
-	if (   (0x80 == (0xC0 & buf[0]))
-	    && (0    == (0xCF & buf[1]))
-	    && (0xF0 == (0xF8 & buf[-1]))) {
-	    ch = 0xD7C0
-		+ ((0x07 & buf[-1]) << 8)
-		+ ((0x3F & buf[0])  << 2)
-		+ ((0x30 & buf[1])  >> 4);
-	    buf[1]  = (char) (0x80 | (0x3F & ch));
-	    buf[0]  = (char) (0x80 | (0x3F & (ch >> 6)));
-	    buf[-1] = (char) (0xE0 | (ch >> 12));
-	    return 2;
+	if (ch <= 0x3FFFFFF) {
+	    buf[4] = (char) ((ch | 0x80) & 0xBF);
+	    buf[3] = (char) (((ch >> 6) | 0x80) & 0xBF);
+	    buf[2] = (char) (((ch >> 12) | 0x80) & 0xBF);
+	    buf[1] = (char) (((ch >> 18) | 0x80) & 0xBF);
+	    buf[0] = (char) ((ch >> 24) | 0xF8);
+	    return 5;
+	}
+	if (ch <= 0x7FFFFFFF) {
+	    buf[5] = (char) ((ch | 0x80) & 0xBF);
+	    buf[4] = (char) (((ch >> 6) | 0x80) & 0xBF);
+	    buf[3] = (char) (((ch >> 12) | 0x80) & 0xBF);
+	    buf[2] = (char) (((ch >> 18) | 0x80) & 0xBF);
+	    buf[1] = (char) (((ch >> 24) | 0x80) & 0xBF);
+	    buf[0] = (char) ((ch >> 30) | 0xFC);
+	    return 6;
 	}
+#endif
     }
 
     ch = 0xFFFD;
-three:
-    buf[2] = (char) (0x80 | (0x3F & ch));
-    buf[1] = (char) (0x80 | (0x3F & (ch >> 6)));
-    buf[0] = (char) (0xE0 |         (ch >> 12));
-    return 3;
+    goto three;
 }
 
 /*
@@ -302,41 +229,29 @@ three:
  *---------------------------------------------------------------------------
  */
 
-#undef Tcl_UniCharToUtfDString
 char *
 Tcl_UniCharToUtfDString(
-    const int *uniStr,	/* Unicode string to convert to UTF-8. */
-    Tcl_Size uniLength,		/* Length of Unicode string. Negative for nul
-    				 * terminated string */
+    CONST Tcl_UniChar *uniStr,	/* Unicode string to convert to UTF-8. */
+    int uniLength,		/* Length of Unicode string in Tcl_UniChars
+				 * (must be >= 0). */
     Tcl_DString *dsPtr)		/* UTF-8 representation of string is appended
 				 * to this previously initialized DString. */
 {
-    const int *w, *wEnd;
+    CONST Tcl_UniChar *w, *wEnd;
     char *p, *string;
-    Tcl_Size oldLength;
+    int oldLength;
 
     /*
-     * UTF-8 string length in bytes will be <= Unicode string length * 4.
+     * UTF-8 string length in bytes will be <= Unicode string length *
+     * TCL_UTF_MAX.
      */
 
-    if (uniStr == NULL) {
-	return NULL;
-    }
-    if (uniLength < 0) {
-	uniLength = 0;
-	w = uniStr;
-	while (*w != '\0') {
-	    uniLength++;
-	    w++;
-	}
-    }
     oldLength = Tcl_DStringLength(dsPtr);
-    Tcl_DStringSetLength(dsPtr, oldLength + (uniLength + 1) * 4);
+    Tcl_DStringSetLength(dsPtr, (oldLength + uniLength + 1) * TCL_UTF_MAX);
     string = Tcl_DStringValue(dsPtr) + oldLength;
 
     p = string;
     wEnd = uniStr + uniLength;
-
     for (w = uniStr; w < wEnd; ) {
 	p += Tcl_UniCharToUtf(*w, p);
 	w++;
@@ -346,61 +261,6 @@ Tcl_UniCharToUtfDString(
     return string;
 }
 
-char *
-Tcl_Char16ToUtfDString(
-    const unsigned short *uniStr,/* Utf-16 string to convert to UTF-8. */
-    Tcl_Size uniLength,		/* Length of Utf-16 string. */
-    Tcl_DString *dsPtr)		/* UTF-8 representation of string is appended
-				 * to this previously initialized DString. */
-{
-    const unsigned short *w, *wEnd;
-    char *p, *string;
-    Tcl_Size oldLength;
-    int len = 1;
-
-    /*
-     * UTF-8 string length in bytes will be <= Utf16 string length * 3.
-     */
-
-    if (uniStr == NULL) {
-	return NULL;
-    }
-    if (uniLength < 0) {
-
-	uniLength = 0;
-	w = uniStr;
-	while (*w != '\0') {
-	    uniLength++;
-	    w++;
-	}
-    }
-    oldLength = Tcl_DStringLength(dsPtr);
-    Tcl_DStringSetLength(dsPtr, oldLength + (uniLength + 1) * 3);
-    string = Tcl_DStringValue(dsPtr) + oldLength;
-
-    p = string;
-    wEnd = uniStr + uniLength;
-
-    for (w = uniStr; w < wEnd; ) {
-	if (!len && ((*w & 0xFC00) != 0xDC00)) {
-	    /* Special case for handling high surrogates. */
-	    p += Tcl_UniCharToUtf(-1, p);
-	}
-	len = Tcl_UniCharToUtf(*w, p);
-	p += len;
-	if ((*w >= 0xD800) && (len < 3)) {
-	    len = 0; /* Indication that high surrogate was found */
-	}
-	w++;
-    }
-    if (!len) {
-	/* Special case for handling high surrogates. */
-	p += Tcl_UniCharToUtf(-1, p);
-    }
-    Tcl_DStringSetLength(dsPtr, oldLength + (p - string));
-
-    return string;
-}
 /*
  *---------------------------------------------------------------------------
  *
@@ -417,15 +277,6 @@ Tcl_Char16ToUtfDString(
  *	Tcl_UtfCharComplete() before calling this routine to ensure that
  *	enough bytes remain in the string.
  *
- *	Special handling of Surrogate pairs is done:
- *	For any UTF-8 string containing a character outside of the BMP, the
- *	first call to this function will fill *chPtr with the high surrogate
- *	and generate a return value of 1. Calling Tcl_UtfToUniChar again
- *	will produce the low surrogate and a return value of 3. Because *chPtr
- *	is used to remember whether the high surrogate is already produced, it
- *	is recommended to initialize the variable it points to as 0 before
- *	the first call to Tcl_UtfToUniChar is done.
- *
  * Results:
  *	*chPtr is filled with the Tcl_UniChar, and the return value is the
  *	number of bytes from the UTF-8 string that were consumed.
@@ -436,192 +287,89 @@ Tcl_Char16ToUtfDString(
  *---------------------------------------------------------------------------
  */
 
-static const unsigned short cp1252[32] = {
-  0x20AC,   0x81, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
-  0x02C6, 0x2030, 0x0160, 0x2039, 0x0152,   0x8D, 0x017D,   0x8F,
-    0x90, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
-   0x2DC, 0x2122, 0x0161, 0x203A, 0x0153,   0x9D, 0x017E, 0x0178
-};
-
-Tcl_Size
+int
 Tcl_UtfToUniChar(
-    const char *src,	/* The UTF-8 string. */
-    int *chPtr)/* Filled with the Unicode character represented by
+    register CONST char *src,	/* The UTF-8 string. */
+    register Tcl_UniChar *chPtr)/* Filled with the Tcl_UniChar represented by
 				 * the UTF-8 string. */
 {
-    int byte;
+    register int byte;
 
     /*
-     * Unroll 1 to 4 byte UTF-8 sequences.
+     * Unroll 1 to 3 byte UTF-8 sequences, use loop to handle longer ones.
      */
 
     byte = *((unsigned char *) src);
     if (byte < 0xC0) {
 	/*
 	 * Handles properly formed UTF-8 characters between 0x01 and 0x7F.
-	 * Treats naked trail bytes 0x80 to 0x9F as valid characters from
-	 * the cp1252 table. See: <https://en.wikipedia.org/wiki/UTF-8>
-	 * Also treats \0 and other naked trail bytes 0xA0 to 0xBF as valid
+	 * Also treats \0 and naked trail bytes 0x80 to 0xBF as valid
 	 * characters representing themselves.
 	 */
 
-	if ((unsigned)(byte-0x80) < (unsigned)0x20) {
-	    *chPtr = cp1252[byte-0x80];
-	} else {
-	    *chPtr = byte;
-	}
+	*chPtr = (Tcl_UniChar) byte;
 	return 1;
     } else if (byte < 0xE0) {
-	if ((byte != 0xC1) && ((src[1] & 0xC0) == 0x80)) {
+	if ((src[1] & 0xC0) == 0x80) {
 	    /*
 	     * Two-byte-character lead-byte followed by a trail-byte.
 	     */
 
-	    *chPtr = (((byte & 0x1F) << 6) | (src[1] & 0x3F));
-	    if ((unsigned)(*chPtr - 1) >= (UNICODE_SELF - 1)) {
-		return 2;
-	    }
+	    *chPtr = (Tcl_UniChar) (((byte & 0x1F) << 6) | (src[1] & 0x3F));
+	    return 2;
 	}
 
 	/*
 	 * A two-byte-character lead-byte not followed by trail-byte
 	 * represents itself.
 	 */
+
+	*chPtr = (Tcl_UniChar) byte;
+	return 1;
     } else if (byte < 0xF0) {
 	if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80)) {
 	    /*
 	     * Three-byte-character lead byte followed by two trail bytes.
 	     */
 
-	    *chPtr = (((byte & 0x0F) << 12)
+	    *chPtr = (Tcl_UniChar) (((byte & 0x0F) << 12)
 		    | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F));
-	    if (*chPtr > 0x7FF) {
-		return 3;
-	    }
+	    return 3;
 	}
 
 	/*
 	 * A three-byte-character lead-byte not followed by two trail-bytes
 	 * represents itself.
 	 */
-    } else if (byte < 0xF5) {
-	if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) {
-	    /*
-	     * Four-byte-character lead byte followed by three trail bytes.
-	     */
-	    *chPtr = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
-		    | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F));
-	    if ((unsigned)(*chPtr - 0x10000) <= 0xFFFFF) {
-		return 4;
-	    }
-	}
-
-	/*
-	 * A four-byte-character lead-byte not followed by three trail-bytes
-	 * represents itself.
-	 */
-    }
 
-    *chPtr = byte;
-    return 1;
-}
-
-Tcl_Size
-Tcl_UtfToChar16(
-    const char *src,	/* The UTF-8 string. */
-    unsigned short *chPtr)/* Filled with the Tcl_UniChar represented by
-				 * the UTF-8 string. This could be a surrogate too. */
-{
-    unsigned short byte;
-
-    /*
-     * Unroll 1 to 4 byte UTF-8 sequences.
-     */
-
-    byte = UCHAR(*src);
-    if (byte < 0xC0) {
-	/*
-	 * Handles properly formed UTF-8 characters between 0x01 and 0x7F.
-	 * Treats naked trail bytes 0x80 to 0x9F as valid characters from
-	 * the cp1252 table. See: <https://en.wikipedia.org/wiki/UTF-8>
-	 * Also treats \0 and other naked trail bytes 0xA0 to 0xBF as valid
-	 * characters representing themselves.
-	 */
-
-	/* If *chPtr contains a high surrogate (produced by a previous
-	 * Tcl_UtfToUniChar() call) and the next 3 bytes are UTF-8 continuation
-	 * bytes, then we must produce a follow-up low surrogate. We only
-	 * do that if the high surrogate matches the bits we encounter.
-	 */
-	if (((byte & 0xC0) == 0x80)
-		&& ((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80)
-		&& (((((byte - 0x10) << 2) & 0xFC) | 0xD800) == (*chPtr & 0xFCFC))
-		&& ((src[1] & 0xF0) == (((*chPtr << 4) & 0x30) | 0x80))) {
-	    *chPtr = ((src[1] & 0x0F) << 6) + (src[2] & 0x3F) + 0xDC00;
-	    return 3;
-	}
-	if ((unsigned)(byte-0x80) < (unsigned)0x20) {
-	    *chPtr = cp1252[byte-0x80];
-	} else {
-	    *chPtr = byte;
-	}
+	*chPtr = (Tcl_UniChar) byte;
 	return 1;
-    } else if (byte < 0xE0) {
-	if ((byte != 0xC1) && ((src[1] & 0xC0) == 0x80)) {
-	    /*
-	     * Two-byte-character lead-byte followed by a trail-byte.
-	     */
-
-	    *chPtr = (((byte & 0x1F) << 6) | (src[1] & 0x3F));
-	    if ((unsigned)(*chPtr - 1) >= (UNICODE_SELF - 1)) {
-		return 2;
-	    }
-	}
-
-	/*
-	 * A two-byte-character lead-byte not followed by trail-byte
-	 * represents itself.
-	 */
-    } else if (byte < 0xF0) {
-	if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80)) {
-	    /*
-	     * Three-byte-character lead byte followed by two trail bytes.
-	     */
-
-	    *chPtr = (((byte & 0x0F) << 12)
-		    | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F));
-	    if (*chPtr > 0x7FF) {
-		return 3;
-	    }
-	}
+    }
+#if TCL_UTF_MAX > 3
+    {
+	int ch, total, trail;
 
-	/*
-	 * A three-byte-character lead-byte not followed by two trail-bytes
-	 * represents itself.
-	 */
-    } else if (byte < 0xF5) {
-	if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80)) {
-	    /*
-	     * Four-byte-character lead byte followed by at least two trail bytes.
-	     * We don't test the validity of 3th trail byte, see [ed29806ba]
-	     */
-	    Tcl_UniChar high = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2)
-		    | ((src[2] & 0x3F) >> 4)) - 0x40;
-	    if (high < 0x400) {
-		/* produce high surrogate, advance source pointer */
-		*chPtr = 0xD800 + high;
-		return 1;
-	    }
-	    /* out of range, < 0x10000 or > 0x10FFFF */
+	total = totalBytes[byte];
+	trail = total - 1;
+	if (trail > 0) {
+	    ch = byte & (0x3F >> trail);
+	    do {
+		src++;
+		if ((*src & 0xC0) != 0x80) {
+		    *chPtr = byte;
+		    return 1;
+		}
+		ch <<= 6;
+		ch |= (*src & 0x3F);
+		trail--;
+	    } while (trail > 0);
+	    *chPtr = ch;
+	    return total;
 	}
-
-	/*
-	 * A four-byte-character lead-byte not followed by three trail-bytes
-	 * represents itself.
-	 */
     }
+#endif
 
-    *chPtr = byte;
+    *chPtr = (Tcl_UniChar) byte;
     return 1;
 }
 
@@ -643,118 +391,42 @@ Tcl_UtfToChar16(
  *---------------------------------------------------------------------------
  */
 
-#undef Tcl_UtfToUniCharDString
-int *
+Tcl_UniChar *
 Tcl_UtfToUniCharDString(
-    const char *src,		/* UTF-8 string to convert to Unicode. */
-    Tcl_Size length,		/* Length of UTF-8 string in bytes, or -1 for
+    CONST char *src,		/* UTF-8 string to convert to Unicode. */
+    int length,			/* Length of UTF-8 string in bytes, or -1 for
 				 * strlen(). */
     Tcl_DString *dsPtr)		/* Unicode representation of string is
 				 * appended to this previously initialized
 				 * DString. */
 {
-    int ch = 0, *w, *wString;
-    const char *p;
-    Tcl_Size oldLength;
-    /* Pointer to the end of string. Never read endPtr[0] */
-    const char *endPtr = src + length;
-    /* Pointer to last byte where optimization still can be used */
-    const char *optPtr = endPtr - TCL_UTF_MAX;
-
-    if (src == NULL) {
-	return NULL;
-    }
-    if (length < 0) {
-	length = strlen(src);
-    }
-
-    /*
-     * Unicode string length in Tcl_UniChars will be <= UTF-8 string length in
-     * bytes.
-     */
-
-    oldLength = Tcl_DStringLength(dsPtr);
-
-    Tcl_DStringSetLength(dsPtr,
-	    oldLength + ((length + 1) * sizeof(int)));
-    wString = (int *) (Tcl_DStringValue(dsPtr) + oldLength);
-
-    w = wString;
-    p = src;
-    endPtr = src + length;
-    optPtr = endPtr - 4;
-    while (p <= optPtr) {
-	p += TclUtfToUniChar(p, &ch);
-	*w++ = ch;
-    }
-    while ((p < endPtr) && Tcl_UtfCharComplete(p, endPtr-p)) {
-	p += TclUtfToUniChar(p, &ch);
-	*w++ = ch;
-    }
-    while (p < endPtr) {
-	*w++ = UCHAR(*p++);
-    }
-    *w = '\0';
-    Tcl_DStringSetLength(dsPtr,
-	    oldLength + ((char *) w - (char *) wString));
-
-    return wString;
-}
+    Tcl_UniChar *w, *wString;
+    CONST char *p, *end;
+    int oldLength;
 
-unsigned short *
-Tcl_UtfToChar16DString(
-    const char *src,		/* UTF-8 string to convert to Unicode. */
-    Tcl_Size length,		/* Length of UTF-8 string in bytes, or -1 for
-				 * strlen(). */
-    Tcl_DString *dsPtr)		/* Unicode representation of string is
-				 * appended to this previously initialized
-				 * DString. */
-{
-    unsigned short ch = 0, *w, *wString;
-    const char *p;
-    Tcl_Size oldLength;
-    /* Pointer to the end of string. Never read endPtr[0] */
-    const char *endPtr = src + length;
-    /* Pointer to last byte where optimization still can be used */
-    const char *optPtr = endPtr - TCL_UTF_MAX;
-
-    if (src == NULL) {
-	return NULL;
-    }
     if (length < 0) {
 	length = strlen(src);
     }
 
     /*
-     * Unicode string length in WCHARs will be <= UTF-8 string length in
+     * Unicode string length in Tcl_UniChars will be <= UTF-8 string length in
      * bytes.
      */
 
     oldLength = Tcl_DStringLength(dsPtr);
-
     Tcl_DStringSetLength(dsPtr,
-	    oldLength + ((length + 1) * sizeof(unsigned short)));
-    wString = (unsigned short *) (Tcl_DStringValue(dsPtr) + oldLength);
+	    (int) ((oldLength + length + 1) * sizeof(Tcl_UniChar)));
+    wString = (Tcl_UniChar *) (Tcl_DStringValue(dsPtr) + oldLength);
 
     w = wString;
-    p = src;
-    endPtr = src + length;
-    optPtr = endPtr - 3;
-    while (p <= optPtr) {
-	p += Tcl_UtfToChar16(p, &ch);
-	*w++ = ch;
-    }
-    while (p < endPtr) {
-	if (Tcl_UtfCharComplete(p, endPtr-p)) {
-	    p += Tcl_UtfToChar16(p, &ch);
-	    *w++ = ch;
-	} else {
-	    *w++ = UCHAR(*p++);
-	}
+    end = src + length;
+    for (p = src; p < end; ) {
+	p += TclUtfToUniChar(p, w);
+	w++;
     }
     *w = '\0';
     Tcl_DStringSetLength(dsPtr,
-	    oldLength + ((char *) w - (char *) wString));
+	    (oldLength + ((char *) w - (char *) wString)));
 
     return wString;
 }
@@ -780,11 +452,14 @@ Tcl_UtfToChar16DString(
 
 int
 Tcl_UtfCharComplete(
-    const char *src,		/* String to check if first few bytes contain
+    CONST char *src,		/* String to check if first few bytes contain
 				 * a complete UTF-8 character. */
-    Tcl_Size length)		/* Length of above string in bytes. */
+    int length)			/* Length of above string in bytes. */
 {
-    return length >= complete[UCHAR(*src)];
+    int ch;
+
+    ch = *((unsigned char *) src);
+    return length >= totalBytes[ch];
 }
 
 /*
@@ -805,124 +480,59 @@ Tcl_UtfCharComplete(
  *---------------------------------------------------------------------------
  */
 
-Tcl_Size
-TclNumUtfChars(
-    const char *src,	/* The UTF-8 string to measure. */
-    Tcl_Size length)	/* The length of the string in bytes, or
-			 * negative value for strlen(src). */
-{
-    Tcl_UniChar ch = 0;
-    Tcl_Size i = 0;
-
-    if (length < 0) {
-	/* string is NUL-terminated, so TclUtfToUniChar calls are safe. */
-	while ((*src != '\0') && (i < INT_MAX)) {
-	    src += TclUtfToUniChar(src, &ch);
-	    i++;
-	}
-    } else {
-	/* Will return value between 0 and length. No overflow checks. */
-
-	/* Pointer to the end of string. Never read endPtr[0] */
-	const char *endPtr = src + length;
-	/* Pointer to last byte where optimization still can be used */
-	const char *optPtr = endPtr - 4;
-
-	/*
-	 * Optimize away the call in this loop. Justified because...
-	 * when (src <= optPtr), (endPtr - src) >= (endPtr - optPtr)
-	 * By initialization above (endPtr - optPtr) = TCL_UTF_MAX
-	 * So (endPtr - src) >= TCL_UTF_MAX, and passing that to
-	 * Tcl_UtfCharComplete we know will cause return of 1.
-	 */
-	while (src <= optPtr
-		/* && Tcl_UtfCharComplete(src, endPtr - src) */ ) {
-	    src += TclUtfToUniChar(src, &ch);
-	    i++;
-	}
-	/* Loop over the remaining string where call must happen */
-	while (src < endPtr) {
-	    if (Tcl_UtfCharComplete(src, endPtr - src)) {
-		src += TclUtfToUniChar(src, &ch);
-	    } else {
-		/*
-		 * src points to incomplete UTF-8 sequence
-		 * Treat first byte as character and count it
-		 */
-		src++;
-	    }
-	    i++;
-	}
-    }
-    return i;
-}
-
-#if !defined(TCL_NO_DEPRECATED)
-Tcl_Size
+int
 Tcl_NumUtfChars(
-    const char *src,	/* The UTF-8 string to measure. */
-    Tcl_Size length)	/* The length of the string in bytes, or
-			 * negative for strlen(src). */
+    register CONST char *src,	/* The UTF-8 string to measure. */
+    int length)			/* The length of the string in bytes, or -1
+				 * for strlen(string). */
 {
-    unsigned short ch = 0;
-    Tcl_Size i = 0;
+    Tcl_UniChar ch;
+    register Tcl_UniChar *chPtr = &ch;
+    register int i;
+
+    /*
+     * The separate implementations are faster.
+     *
+     * Since this is a time-sensitive function, we also do the check for the
+     * single-byte char case specially.
+     */
 
+    i = 0;
     if (length < 0) {
-	/* string is NUL-terminated, so TclUtfToUniChar calls are safe. */
-	while ((*src != '\0') && (i < INT_MAX)) {
-	    src += Tcl_UtfToChar16(src, &ch);
+	while (*src != '\0') {
+	    src += TclUtfToUniChar(src, chPtr);
 	    i++;
 	}
     } else {
-	/* Will return value between 0 and length. No overflow checks. */
+	register int n;
 
-	/* Pointer to the end of string. Never read endPtr[0] */
-	const char *endPtr = src + length;
-	/* Pointer to last byte where optimization still can be used */
-	const char *optPtr = endPtr - 4;
-
-	/*
-	 * Optimize away the call in this loop. Justified because...
-	 * when (src <= optPtr), (endPtr - src) >= (endPtr - optPtr)
-	 * By initialization above (endPtr - optPtr) = TCL_UTF_MAX
-	 * So (endPtr - src) >= TCL_UTF_MAX, and passing that to
-	 * Tcl_UtfCharComplete we know will cause return of 1.
-	 */
-	while (src <= optPtr
-		/* && Tcl_UtfCharComplete(src, endPtr - src) */ ) {
-	    src += Tcl_UtfToChar16(src, &ch);
-	    i++;
-	}
-	/* Loop over the remaining string where call must happen */
-	while (src < endPtr) {
-	    if (Tcl_UtfCharComplete(src, endPtr - src)) {
-		src += Tcl_UtfToChar16(src, &ch);
-	    } else {
-		/*
-		 * src points to incomplete UTF-8 sequence
-		 * Treat first byte as character and count it
-		 */
+	while (length > 0) {
+	    if (UCHAR(*src) < 0xC0) {
+		length--;
 		src++;
+	    } else {
+		n = Tcl_UtfToUniChar(src, chPtr);
+		length -= n;
+		src += n;
 	    }
 	    i++;
 	}
     }
     return i;
 }
-#endif
-
+
 /*
  *---------------------------------------------------------------------------
  *
  * Tcl_UtfFindFirst --
  *
- *	Returns a pointer to the first occurrence of the given Unicode character
- *	in the NULL-terminated UTF-8 string. The NULL terminator is considered
+ *	Returns a pointer to the first occurance of the given Tcl_UniChar in
+ *	the NULL-terminated UTF-8 string. The NULL terminator is considered
  *	part of the UTF-8 string. Equivalent to Plan 9 utfrune().
  *
  * Results:
- *	As above. If the Unicode character does not exist in the given string,
- *	the return value is NULL.
+ *	As above. If the Tcl_UniChar does not exist in the given string, the
+ *	return value is NULL.
  *
  * Side effects:
  *	None.
@@ -930,14 +540,16 @@ Tcl_NumUtfChars(
  *---------------------------------------------------------------------------
  */
 
-const char *
+CONST char *
 Tcl_UtfFindFirst(
-    const char *src,		/* The UTF-8 string to be searched. */
-    int ch)			/* The Unicode character to search for. */
+    CONST char *src,		/* The UTF-8 string to be searched. */
+    int ch)			/* The Tcl_UniChar to search for. */
 {
-    while (1) {
-	int find, len = TclUtfToUniChar(src, &find);
+    int len;
+    Tcl_UniChar find;
 
+    while (1) {
+	len = TclUtfToUniChar(src, &find);
 	if (find == ch) {
 	    return src;
 	}
@@ -953,12 +565,12 @@ Tcl_UtfFindFirst(
  *
  * Tcl_UtfFindLast --
  *
- *	Returns a pointer to the last occurrence of the given Unicode character
- *	in the NULL-terminated UTF-8 string. The NULL terminator is considered
+ *	Returns a pointer to the last occurance of the given Tcl_UniChar in
+ *	the NULL-terminated UTF-8 string. The NULL terminator is considered
  *	part of the UTF-8 string. Equivalent to Plan 9 utfrrune().
  *
  * Results:
- *	As above. If the Unicode character does not exist in the given string, the
+ *	As above. If the Tcl_UniChar does not exist in the given string, the
  *	return value is NULL.
  *
  * Side effects:
@@ -967,16 +579,18 @@ Tcl_UtfFindFirst(
  *---------------------------------------------------------------------------
  */
 
-const char *
+CONST char *
 Tcl_UtfFindLast(
-    const char *src,		/* The UTF-8 string to be searched. */
-    int ch)			/* The Unicode character to search for. */
+    CONST char *src,		/* The UTF-8 string to be searched. */
+    int ch)			/* The Tcl_UniChar to search for. */
 {
-    const char *last = NULL;
+    int len;
+    Tcl_UniChar find;
+    CONST char *last;
 
+    last = NULL;
     while (1) {
-	int find, len = TclUtfToUniChar(src, &find);
-
+	len = TclUtfToUniChar(src, &find);
 	if (find == ch) {
 	    last = src;
 	}
@@ -993,11 +607,9 @@ Tcl_UtfFindLast(
  *
  * Tcl_UtfNext --
  *
- *	Given a pointer to some location in a UTF-8 string, Tcl_UtfNext
- *	returns a pointer to the next UTF-8 character in the string.
- *	The caller must not ask for the next character after the last
- *	character in the string if the string is not terminated by a null
- *	character.
+ *	Given a pointer to some current location in a UTF-8 string, move
+ *	forward one character. The caller must ensure that they are not asking
+ *	for the next character after the last character in the string.
  *
  * Results:
  *	The return value is the pointer to the next character in the UTF-8
@@ -1009,48 +621,13 @@ Tcl_UtfFindLast(
  *---------------------------------------------------------------------------
  */
 
-const char *
+CONST char *
 Tcl_UtfNext(
-    const char *src)		/* The current location in the string. */
+    CONST char *src)		/* The current location in the string. */
 {
-    int left;
-    const char *next;
-
-    if (((*src) & 0xC0) == 0x80) {
-	/* Continuation byte, so we start 'inside' a (possible valid) UTF-8
-	 * sequence. Since we are not allowed to access src[-1], we cannot
-	 * check if the sequence is actually valid, the best we can do is
-	 * just assume it is valid and locate the end. */
-	if ((((*++src) & 0xC0) == 0x80) && (((*++src) & 0xC0) == 0x80)) {
-	    ++src;
-	}
-	return src;
-    }
+    Tcl_UniChar ch;
 
-    left = totalBytes[UCHAR(*src)];
-    next = src + 1;
-    while (--left) {
-	if ((*next & 0xC0) != 0x80) {
-	    /*
-	     * src points to non-trail byte; We ran out of trail bytes
-	     * before the needs of the lead byte were satisfied.
-	     * Let the (malformed) lead byte alone be a character
-	     */
-	    return src + 1;
-	}
-	next++;
-    }
-    /*
-     * Call Invalid() here only if required conditions are met:
-     *    src[0] is known a lead byte.
-     *    src[1] is known a trail byte.
-     * Especially important to prevent calls when src[0] == '\xF8' or '\xFC'
-     * See tests utf-6.37 through utf-6.43 through valgrind or similar tool.
-     */
-    if ((next == src + 1) || Invalid(src)) {
-	return src + 1;
-    }
-    return next;
+    return src + TclUtfToUniChar(src, &ch);
 }
 
 /*
@@ -1074,96 +651,34 @@ Tcl_UtfNext(
  *---------------------------------------------------------------------------
  */
 
-const char *
+CONST char *
 Tcl_UtfPrev(
-    const char *src,		/* A location in a UTF-8 string. */
-    const char *start)		/* Pointer to the beginning of the string */
+    CONST char *src,		/* The current location in the string. */
+    CONST char *start)		/* Pointer to the beginning of the string, to
+				 * avoid going backwards too far. */
 {
-    int trailBytesSeen = 0;	/* How many trail bytes have been verified? */
-    const char *fallback = src - 1;
-				/* If we cannot find a lead byte that might
-				 * start a prefix of a valid UTF byte sequence,
-				 * we will fallback to a one-byte back step */
-    const char *look = fallback;
-				/* Start search at the fallback position */
-
-    /* Quick boundary case exit. */
-    if (fallback <= start) {
-	return start;
-    }
-
-    do {
-	unsigned char byte = UCHAR(look[0]);
-
+    CONST char *look;
+    int i, byte;
+
+    src--;
+    look = src;
+    for (i = 0; i < TCL_UTF_MAX; i++) {
+	if (look < start) {
+	    if (src < start) {
+		src = start;
+	    }
+	    break;
+	}
+	byte = *((unsigned char *) look);
 	if (byte < 0x80) {
-	    /*
-	     * Single byte character. Either this is a correct previous
-	     * character, or it is followed by at least one trail byte
-	     * which indicates a malformed sequence. In either case the
-	     * correct result is to return the fallback.
-	     */
-	    return fallback;
+	    break;
 	}
 	if (byte >= 0xC0) {
-	    /* Non-trail byte; May be multibyte lead. */
-
-	    if ((trailBytesSeen == 0)
-		/*
-		 * We've seen no trailing context to use to check
-		 * anything. From what we know, this non-trail byte
-		 * is a prefix of a previous character, and accepting
-		 * it (the fallback) is correct.
-		 */
-
-		    || (trailBytesSeen >= totalBytes[byte])) {
-		/*
-		 * That is, (1 + trailBytesSeen > needed).
-		 * We've examined more bytes than needed to complete
-		 * this lead byte. No matter about well-formedness or
-		 * validity, the sequence starting with this lead byte
-		 * will never include the fallback location, so we must
-		 * return the fallback location. See test utf-7.17
-		 */
-		return fallback;
-	    }
-
-	    /*
-	     * trailBytesSeen > 0, so we can examine look[1] safely.
-	     * Use that capability to screen out invalid sequences.
-	     */
-
-	    if (Invalid(look)) {
-		/* Reject */
-		return fallback;
-	    }
-	    return (const char *)look;
+	    return look;
 	}
-
-	/* We saw a trail byte. */
-	trailBytesSeen++;
-
-	if ((const char *)look == start) {
-	    /*
-	     * Do not read before the start of the string
-	     *
-	     * If we get here, we've examined bytes at every location
-	     * >= start and < src and all of them are trail bytes,
-	     * including (*start).  We need to return our fallback
-	     * and exit this loop before we run past the start of the string.
-	     */
-	    return fallback;
-	}
-
-	/* Continue the search backwards... */
 	look--;
-    } while (trailBytesSeen < 4);
-
-    /*
-     * We've seen 4 trail bytes, so we know there will not be a
-     * properly formed byte sequence to find, and we can stop looking,
-     * accepting the fallback.
-     */
-    return fallback;
+    }
+    return src;
 }
 
 /*
@@ -1183,27 +698,18 @@ Tcl_UtfPrev(
  *---------------------------------------------------------------------------
  */
 
-int
+Tcl_UniChar
 Tcl_UniCharAtIndex(
-    const char *src,	/* The UTF-8 string to dereference. */
-    Tcl_Size index)	/* The position of the desired character. */
+    register CONST char *src,	/* The UTF-8 string to dereference. */
+    register int index)		/* The position of the desired character. */
 {
-    unsigned short ch = 0;
-    int i = 0;
+    Tcl_UniChar ch;
 
-    if (index < 0) {
-	return -1;
-    }
-    while (index-- > 0) {
-	i = Tcl_UtfToChar16(src, &ch);
-	src += i;
-    }
-    if ((ch >= 0xD800) && (i < 3)) {
-	/* Index points at character following high Surrogate */
-	return -1;
+    while (index >= 0) {
+	index--;
+	src += TclUtfToUniChar(src, &ch);
     }
-    TclUtfToUniChar(src, &i);
-    return i;
+    return ch;
 }
 
 /*
@@ -1223,41 +729,20 @@ Tcl_UniCharAtIndex(
  *---------------------------------------------------------------------------
  */
 
-const char *
-TclUtfAtIndex(
-    const char *src,	/* The UTF-8 string. */
-    Tcl_Size index)	/* The position of the desired character. */
+CONST char *
+Tcl_UtfAtIndex(
+    register CONST char *src,	/* The UTF-8 string. */
+    register int index)		/* The position of the desired character. */
 {
-    Tcl_UniChar ch = 0;
+    Tcl_UniChar ch;
 
-    while (index-- > 0) {
+    while (index > 0) {
+	index--;
 	src += TclUtfToUniChar(src, &ch);
     }
     return src;
 }
 
-#if !defined(TCL_NO_DEPRECATED)
-const char *
-Tcl_UtfAtIndex(
-    const char *src,	/* The UTF-8 string. */
-    Tcl_Size index)	/* The position of the desired character. */
-{
-    unsigned short ch = 0;
-    Tcl_Size len = 0;
-
-    if (index > 0) {
-	while (index--) {
-	    src += (len = Tcl_UtfToChar16(src, &ch));
-	}
-	if ((ch >= 0xD800) && (len < 3)) {
-	    /* Index points at character following high Surrogate */
-	    src += Tcl_UtfToChar16(src, &ch);
-	}
-    }
-    return src;
-}
-#endif
-
 /*
  *---------------------------------------------------------------------------
  *
@@ -1267,7 +752,7 @@ Tcl_UtfAtIndex(
  *
  * Results:
  *	Stores the bytes represented by the backslash sequence in dst and
- *	returns the number of bytes written to dst. At most 4 bytes
+ *	returns the number of bytes written to dst. At most TCL_UTF_MAX bytes
  *	are written to dst; dst must have been large enough to accept those
  *	bytes. If readPtr isn't NULL then it is filled in with a count of the
  *	number of bytes in the backslash sequence.
@@ -1284,9 +769,9 @@ Tcl_UtfAtIndex(
  *---------------------------------------------------------------------------
  */
 
-Tcl_Size
+int
 Tcl_UtfBackslash(
-    const char *src,		/* Points to the backslash character of a
+    CONST char *src,		/* Points to the backslash character of a
 				 * backslash sequence. */
     int *readPtr,		/* Fill in with number of characters read from
 				 * src, unless NULL. */
@@ -1294,7 +779,7 @@ Tcl_UtfBackslash(
 				 * backslash sequence. */
 {
 #define LINE_LENGTH 128
-    Tcl_Size numRead;
+    int numRead;
     int result;
 
     result = TclParseBackslash(src, LINE_LENGTH, &numRead, dst);
@@ -1303,7 +788,7 @@ Tcl_UtfBackslash(
 	 * We ate a whole line. Pay the price of a strlen()
 	 */
 
-	result = TclParseBackslash(src, strlen(src), &numRead, dst);
+	result = TclParseBackslash(src, (int)strlen(src), &numRead, dst);
     }
     if (readPtr != NULL) {
 	*readPtr = numRead;
@@ -1329,13 +814,13 @@ Tcl_UtfBackslash(
  *----------------------------------------------------------------------
  */
 
-Tcl_Size
+int
 Tcl_UtfToUpper(
     char *str)			/* String to convert in place. */
 {
-    int ch, upChar;
+    Tcl_UniChar ch, upChar;
     char *src, *dst;
-    Tcl_Size len;
+    int bytes;
 
     /*
      * Iterate over the string until we hit the terminating null.
@@ -1343,7 +828,7 @@ Tcl_UtfToUpper(
 
     src = dst = str;
     while (*src) {
-	len = TclUtfToUniChar(src, &ch);
+	bytes = TclUtfToUniChar(src, &ch);
 	upChar = Tcl_UniCharToUpper(ch);
 
 	/*
@@ -1352,13 +837,13 @@ Tcl_UtfToUpper(
 	 * char to dst if its size is <= the original char.
 	 */
 
-	if ((len < TclUtfCount(upChar)) || ((upChar & ~0x7FF) == 0xD800)) {
-	    memmove(dst, src, len);
-	    dst += len;
+	if (bytes < UtfCount(upChar)) {
+	    memcpy(dst, src, (size_t) bytes);
+	    dst += bytes;
 	} else {
 	    dst += Tcl_UniCharToUtf(upChar, dst);
 	}
-	src += len;
+	src += bytes;
     }
     *dst = '\0';
     return (dst - str);
@@ -1382,13 +867,13 @@ Tcl_UtfToUpper(
  *----------------------------------------------------------------------
  */
 
-Tcl_Size
+int
 Tcl_UtfToLower(
     char *str)			/* String to convert in place. */
 {
-    int ch, lowChar;
+    Tcl_UniChar ch, lowChar;
     char *src, *dst;
-    Tcl_Size len;
+    int bytes;
 
     /*
      * Iterate over the string until we hit the terminating null.
@@ -1396,7 +881,7 @@ Tcl_UtfToLower(
 
     src = dst = str;
     while (*src) {
-	len = TclUtfToUniChar(src, &ch);
+	bytes = TclUtfToUniChar(src, &ch);
 	lowChar = Tcl_UniCharToLower(ch);
 
 	/*
@@ -1405,13 +890,13 @@ Tcl_UtfToLower(
 	 * char to dst if its size is <= the original char.
 	 */
 
-	if ((len < TclUtfCount(lowChar)) || ((lowChar & ~0x7FF) == 0xD800)) {
-	    memmove(dst, src, len);
-	    dst += len;
+	if (bytes < UtfCount(lowChar)) {
+	    memcpy(dst, src, (size_t) bytes);
+	    dst += bytes;
 	} else {
 	    dst += Tcl_UniCharToUtf(lowChar, dst);
 	}
-	src += len;
+	src += bytes;
     }
     *dst = '\0';
     return (dst - str);
@@ -1436,13 +921,13 @@ Tcl_UtfToLower(
  *----------------------------------------------------------------------
  */
 
-Tcl_Size
+int
 Tcl_UtfToTitle(
     char *str)			/* String to convert in place. */
 {
-    int ch, titleChar, lowChar;
+    Tcl_UniChar ch, titleChar, lowChar;
     char *src, *dst;
-    Tcl_Size len;
+    int bytes;
 
     /*
      * Capitalize the first character and then lowercase the rest of the
@@ -1452,32 +937,28 @@ Tcl_UtfToTitle(
     src = dst = str;
 
     if (*src) {
-	len = TclUtfToUniChar(src, &ch);
+	bytes = TclUtfToUniChar(src, &ch);
 	titleChar = Tcl_UniCharToTitle(ch);
 
-	if ((len < TclUtfCount(titleChar)) || ((titleChar & ~0x7FF) == 0xD800)) {
-	    memmove(dst, src, len);
-	    dst += len;
+	if (bytes < UtfCount(titleChar)) {
+	    memcpy(dst, src, (size_t) bytes);
+	    dst += bytes;
 	} else {
 	    dst += Tcl_UniCharToUtf(titleChar, dst);
 	}
-	src += len;
+	src += bytes;
     }
     while (*src) {
-	len = TclUtfToUniChar(src, &ch);
-	lowChar = ch;
-	/* Special exception for Georgian Asomtavruli chars, no titlecase. */
-	if ((unsigned)(lowChar - 0x1C90) >= 0x30) {
-	    lowChar = Tcl_UniCharToLower(lowChar);
-	}
+	bytes = TclUtfToUniChar(src, &ch);
+	lowChar = Tcl_UniCharToLower(ch);
 
-	if ((len < TclUtfCount(lowChar)) || ((lowChar & ~0x7FF) == 0xD800)) {
-	    memmove(dst, src, len);
-	    dst += len;
+	if (bytes < UtfCount(lowChar)) {
+	    memcpy(dst, src, (size_t) bytes);
+	    dst += bytes;
 	} else {
 	    dst += Tcl_UniCharToUtf(lowChar, dst);
 	}
-	src += len;
+	src += bytes;
     }
     *dst = '\0';
     return (dst - str);
@@ -1502,8 +983,8 @@ Tcl_UtfToTitle(
 
 int
 TclpUtfNcmp2(
-    const char *cs,		/* UTF string to compare to ct. */
-    const char *ct,		/* UTF string cs is compared to. */
+    CONST char *cs,		/* UTF string to compare to ct. */
+    CONST char *ct,		/* UTF string cs is compared to. */
     unsigned long numBytes)	/* Number of *bytes* to compare. */
 {
     /*
@@ -1512,7 +993,7 @@ TclpUtfNcmp2(
      * fine in the strcmp manner.
      */
 
-    int result = 0;
+    register int result = 0;
 
     for ( ; numBytes != 0; numBytes--, cs++, ct++) {
 	if (*cs != *ct) {
@@ -1535,8 +1016,8 @@ TclpUtfNcmp2(
  *
  * Tcl_UtfNcmp --
  *
- *	Compare at most numChars UTF-16 chars of string cs to string ct. Both cs
- *	and ct are assumed to be at least numChars UTF-16 chars long.
+ *	Compare at most numChars UTF chars of string cs to string ct. Both cs
+ *	and ct are assumed to be at least numChars UTF chars long.
  *
  * Results:
  *	Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
@@ -1547,89 +1028,17 @@ TclpUtfNcmp2(
  *----------------------------------------------------------------------
  */
 
-#if !defined(TCL_NO_DEPRECATED)
 int
 Tcl_UtfNcmp(
-    const char *cs,		/* UTF string to compare to ct. */
-    const char *ct,		/* UTF string cs is compared to. */
-    unsigned long numChars)	/* Number of UTF-16 chars to compare. */
-{
-    unsigned short ch1 = 0, ch2 = 0;
-
-    /*
-     * Cannot use 'memcmp(cs, ct, n);' as byte representation of \u0000 (the
-     * pair of bytes 0xC0,0x80) is larger than byte representation of \u0001
-     * (the byte 0x01.)
-     */
-
-    while (numChars-- > 0) {
-	/*
-	 * n must be interpreted as UTF-16 chars, not bytes. This should be called
-	 * only when both strings are of at least n UTF-16 chars long (no need for \0
-	 * check)
-	 */
-
-	cs += Tcl_UtfToChar16(cs, &ch1);
-	ct += Tcl_UtfToChar16(ct, &ch2);
-	if (ch1 != ch2) {
-	    /* Surrogates always report higher than non-surrogates */
-	    if (((ch1 & 0xFC00) == 0xD800)) {
-	    if ((ch2 & 0xFC00) != 0xD800) {
-		return ch1;
-	    }
-	    } else if ((ch2 & 0xFC00) == 0xD800) {
-		return -ch2;
-	    }
-	    return (ch1 - ch2);
-	}
-    }
-    return 0;
-}
-#endif /* TCL_NO_DEPRECATED */
-
-int
-TclUtfNcmp(
-    const char *cs,		/* UTF string to compare to ct. */
-    const char *ct,		/* UTF string cs is compared to. */
-    size_t numChars)	/* Number of chars to compare. */
+    CONST char *cs,		/* UTF string to compare to ct. */
+    CONST char *ct,		/* UTF string cs is compared to. */
+    unsigned long numChars)	/* Number of UTF chars to compare. */
 {
-    Tcl_UniChar ch1 = 0, ch2 = 0;
+    Tcl_UniChar ch1, ch2;
 
     /*
      * Cannot use 'memcmp(cs, ct, n);' as byte representation of \u0000 (the
-     * pair of bytes 0xC0,0x80) is larger than byte representation of \u0001
-     * (the byte 0x01.)
-     */
-
-    while (numChars-- > 0) {
-	/*
-	 * n must be interpreted as chars, not bytes. This should be called
-	 * only when both strings are of at least n chars long (no need for \0
-	 * check)
-	 */
-
-	cs += TclUtfToUniChar(cs, &ch1);
-	ct += TclUtfToUniChar(ct, &ch2);
-	if (ch1 != ch2) {
-	    return (ch1 - ch2);
-	}
-    }
-    return 0;
-}
-
-int
-TclUtfNmemcmp(
-    const void *csPtr,		/* UTF string to compare to ct. */
-    const void *ctPtr,		/* UTF string cs is compared to. */
-    size_t numChars)	/* Number of chars to compare. */
-{
-    Tcl_UniChar ch1 = 0, ch2 = 0;
-    const char *cs = (const char *)csPtr;
-    const char *ct = (const char *)ctPtr;
-
-    /*
-     * Cannot use 'memcmp(cs, ct, n);' as byte representation of \u0000 (the
-     * pair of bytes 0xC0,0x80) is larger than byte representation of \u0001
+     * pair of bytes 0xc0,0x80) is larger than byte representation of \u0001
      * (the byte 0x01.)
      */
 
@@ -1654,8 +1063,8 @@ TclUtfNmemcmp(
  *
  * Tcl_UtfNcasecmp --
  *
- *	Compare at most numChars UTF-16 chars of string cs to string ct case
- *	insensitive. Both cs and ct are assumed to be at least numChars UTF-16
+ *	Compare at most numChars UTF chars of string cs to string ct case
+ *	insensitive. Both cs and ct are assumed to be at least numChars UTF
  *	chars long.
  *
  * Results:
@@ -1667,81 +1076,13 @@ TclUtfNmemcmp(
  *----------------------------------------------------------------------
  */
 
-#if !defined(TCL_NO_DEPRECATED)
 int
 Tcl_UtfNcasecmp(
-    const char *cs,		/* UTF string to compare to ct. */
-    const char *ct,		/* UTF string cs is compared to. */
-    unsigned long numChars)	/* Number of UTF-16 chars to compare. */
+    CONST char *cs,		/* UTF string to compare to ct. */
+    CONST char *ct,		/* UTF string cs is compared to. */
+    unsigned long numChars)	/* Number of UTF chars to compare. */
 {
-    unsigned short ch1 = 0, ch2 = 0;
-
-    while (numChars-- > 0) {
-	/*
-	 * n must be interpreted as UTF-16 chars, not bytes.
-	 * This should be called only when both strings are of
-	 * at least n UTF-16 chars long (no need for \0 check)
-	 */
-	cs += Tcl_UtfToChar16(cs, &ch1);
-	ct += Tcl_UtfToChar16(ct, &ch2);
-	if (ch1 != ch2) {
-	    /* Surrogates always report higher than non-surrogates */
-	    if (((ch1 & 0xFC00) == 0xD800)) {
-	    if ((ch2 & 0xFC00) != 0xD800) {
-		return ch1;
-	    }
-	    } else if ((ch2 & 0xFC00) == 0xD800) {
-		return -ch2;
-	    }
-	    ch1 = Tcl_UniCharToLower(ch1);
-	    ch2 = Tcl_UniCharToLower(ch2);
-	    if (ch1 != ch2) {
-		return (ch1 - ch2);
-	    }
-	}
-    }
-    return 0;
-}
-#endif /* TCL_NO_DEPRECATED */
-
-
-int
-TclUtfNcasecmp(
-    const char *cs,		/* UTF string to compare to ct. */
-    const char *ct,		/* UTF string cs is compared to. */
-    size_t numChars)	/* Number of chars to compare. */
-{
-    Tcl_UniChar ch1 = 0, ch2 = 0;
-
-    while (numChars-- > 0) {
-	/*
-	 * n must be interpreted as chars, not bytes.
-	 * This should be called only when both strings are of
-	 * at least n chars long (no need for \0 check)
-	 */
-	cs += TclUtfToUniChar(cs, &ch1);
-	ct += TclUtfToUniChar(ct, &ch2);
-	if (ch1 != ch2) {
-	    ch1 = Tcl_UniCharToLower(ch1);
-	    ch2 = Tcl_UniCharToLower(ch2);
-	    if (ch1 != ch2) {
-		return (ch1 - ch2);
-	    }
-	}
-    }
-    return 0;
-}
-
-int
-TclUtfNcasememcmp(
-    const void *csPtr,		/* UTF string to compare to ct. */
-    const void *ctPtr,		/* UTF string cs is compared to. */
-    size_t numChars)	/* Number of chars to compare. */
-{
-    const char *cs = (const char *)csPtr;
-    const char *ct = (const char *)ctPtr;
-    Tcl_UniChar ch1 = 0, ch2 = 0;
-
+    Tcl_UniChar ch1, ch2;
     while (numChars-- > 0) {
 	/*
 	 * n must be interpreted as chars, not bytes.
@@ -1760,47 +1101,11 @@ TclUtfNcasememcmp(
     }
     return 0;
 }
-
-/*
- *----------------------------------------------------------------------
- *
- * Tcl_UtfCmp --
- *
- *	Compare UTF chars of string cs to string ct case sensitively.
- *	Replacement for strcmp in Tcl core, in places where UTF-8 should
- *	be handled.
- *
- * Results:
- *	Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
- *
- * Side effects:
- *	None.
- *
- *----------------------------------------------------------------------
- */
-
-int
-TclUtfCmp(
-    const char *cs,		/* UTF string to compare to ct. */
-    const char *ct)		/* UTF string cs is compared to. */
-{
-    Tcl_UniChar ch1 = 0, ch2 = 0;
-
-    while (*cs && *ct) {
-	cs += TclUtfToUniChar(cs, &ch1);
-	ct += TclUtfToUniChar(ct, &ch2);
-	if (ch1 != ch2) {
-	    return ch1 - ch2;
-	}
-    }
-    return UCHAR(*cs) - UCHAR(*ct);
-}
-
 
 /*
  *----------------------------------------------------------------------
  *
- * TclUtfCasecmp --
+ * Tcl_UtfNcasecmp --
  *
  *	Compare UTF chars of string cs to string ct case insensitively.
  *	Replacement for strcasecmp in Tcl core, in places where UTF-8 should
@@ -1817,12 +1122,12 @@ TclUtfCmp(
 
 int
 TclUtfCasecmp(
-    const char *cs,		/* UTF string to compare to ct. */
-    const char *ct)		/* UTF string cs is compared to. */
+    CONST char *cs,		/* UTF string to compare to ct. */
+    CONST char *ct)		/* UTF string cs is compared to. */
 {
-    Tcl_UniChar ch1 = 0, ch2 = 0;
-
     while (*cs && *ct) {
+	Tcl_UniChar ch1, ch2;
+
 	cs += TclUtfToUniChar(cs, &ch1);
 	ct += TclUtfToUniChar(ct, &ch2);
 	if (ch1 != ch2) {
@@ -1853,19 +1158,16 @@ TclUtfCasecmp(
  *----------------------------------------------------------------------
  */
 
-int
+Tcl_UniChar
 Tcl_UniCharToUpper(
     int ch)			/* Unicode character to convert. */
 {
-    if (!UNICODE_OUT_OF_RANGE(ch)) {
-	int info = GetUniCharInfo(ch);
+    int info = GetUniCharInfo(ch);
 
-	if (GetCaseType(info) & 0x04) {
-	    ch -= GetDelta(info);
-	}
+    if (GetCaseType(info) & 0x04) {
+	ch -= GetDelta(info);
     }
-    /* Clear away extension bits, if any */
-    return ch & 0x1FFFFF;
+    return (Tcl_UniChar) ch;
 }
 
 /*
@@ -1884,22 +1186,18 @@ Tcl_UniCharToUpper(
  *----------------------------------------------------------------------
  */
 
-int
+Tcl_UniChar
 Tcl_UniCharToLower(
     int ch)			/* Unicode character to convert. */
 {
-    if (!UNICODE_OUT_OF_RANGE(ch)) {
-	int info = GetUniCharInfo(ch);
-	int mode = GetCaseType(info);
+    int info = GetUniCharInfo(ch);
 
-	if ((mode & 0x02) && (mode != 0x7)) {
-	    ch += GetDelta(info);
-	}
+    if (GetCaseType(info) & 0x02) {
+	ch += GetDelta(info);
     }
-    /* Clear away extension bits, if any */
-    return ch & 0x1FFFFF;
+    return (Tcl_UniChar) ch;
 }
-
+
 /*
  *----------------------------------------------------------------------
  *
@@ -1916,63 +1214,28 @@ Tcl_UniCharToLower(
  *----------------------------------------------------------------------
  */
 
-int
+Tcl_UniChar
 Tcl_UniCharToTitle(
     int ch)			/* Unicode character to convert. */
 {
-    if (!UNICODE_OUT_OF_RANGE(ch)) {
-	int info = GetUniCharInfo(ch);
-	int mode = GetCaseType(info);
+    int info = GetUniCharInfo(ch);
+    int mode = GetCaseType(info);
 
-	if (mode & 0x1) {
-	    /*
-	     * Subtract or add one depending on the original case.
-	     */
+    if (mode & 0x1) {
+	/*
+	 * Subtract or add one depending on the original case.
+	 */
 
-	    if (mode != 0x7) {
-		ch += ((mode & 0x4) ? -1 : 1);
-	    }
-	} else if (mode == 0x4) {
-	    ch -= GetDelta(info);
-	}
+	ch += ((mode & 0x4) ? -1 : 1);
+    } else if (mode == 0x4) {
+	ch -= GetDelta(info);
     }
-    /* Clear away extension bits, if any */
-    return ch & 0x1FFFFF;
+    return (Tcl_UniChar) ch;
 }
 
 /*
  *----------------------------------------------------------------------
  *
- * Tcl_Char16Len --
- *
- *	Find the length of a UniChar string. The str input must be null
- *	terminated.
- *
- * Results:
- *	Returns the length of str in UniChars (not bytes).
- *
- * Side effects:
- *	None.
- *
- *----------------------------------------------------------------------
- */
-
-Tcl_Size
-Tcl_Char16Len(
-    const unsigned short *uniStr)	/* Unicode string to find length of. */
-{
-    Tcl_Size len = 0;
-
-    while (*uniStr != '\0') {
-	len++;
-	uniStr++;
-    }
-    return len;
-}
-
-/*
- *----------------------------------------------------------------------
- *
  * Tcl_UniCharLen --
  *
  *	Find the length of a UniChar string. The str input must be null
@@ -1987,12 +1250,11 @@ Tcl_Char16Len(
  *----------------------------------------------------------------------
  */
 
-#undef Tcl_UniCharLen
-Tcl_Size
+int
 Tcl_UniCharLen(
-    const int *uniStr)	/* Unicode string to find length of. */
+    CONST Tcl_UniChar *uniStr)	/* Unicode string to find length of. */
 {
-    Tcl_Size len = 0;
+    int len = 0;
 
     while (*uniStr != '\0') {
 	len++;
@@ -2006,8 +1268,8 @@ Tcl_UniCharLen(
  *
  * Tcl_UniCharNcmp --
  *
- *	Compare at most numChars chars (not bytes) of string ucs to string uct.
- *	Both ucs and uct are assumed to be at least numChars chars long.
+ *	Compare at most numChars unichars of string ucs to string uct.
+ *	Both ucs and uct are assumed to be at least numChars unichars long.
  *
  * Results:
  *	Return <0 if ucs < uct, 0 if ucs == uct, or >0 if ucs > uct.
@@ -2019,69 +1281,12 @@ Tcl_UniCharLen(
  */
 
 int
-TclUniCharNcmp(
-    const Tcl_UniChar *ucs,	/* Unicode string to compare to uct. */
-    const Tcl_UniChar *uct,	/* Unicode string ucs is compared to. */
-    size_t numChars)	/* Number of chars to compare. */
-{
-#if defined(WORDS_BIGENDIAN)
-    /*
-     * We are definitely on a big-endian machine; memcmp() is safe
-     */
-
-    return memcmp(ucs, uct, numChars*sizeof(Tcl_UniChar));
-
-#else /* !WORDS_BIGENDIAN */
-    /*
-     * We can't simply call memcmp() because that is not lexically correct.
-     */
-
-    for ( ; numChars != 0; ucs++, uct++, numChars--) {
-	if (*ucs != *uct) {
-	    return (*ucs - *uct);
-	}
-    }
-    return 0;
-#endif /* WORDS_BIGENDIAN */
-}
-
-int
-TclUniCharNmemcmp(
-    const void *ucsPtr,	/* Unicode string to compare to uct. */
-    const void *uctPtr,	/* Unicode string ucs is compared to. */
-    size_t numChars)	/* Number of chars (not bytes) to compare. */
-{
-    const Tcl_UniChar *ucs = (const Tcl_UniChar *)ucsPtr;
-    const Tcl_UniChar *uct = (const Tcl_UniChar *)uctPtr;
-#if defined(WORDS_BIGENDIAN)
-    /*
-     * We are definitely on a big-endian machine; memcmp() is safe
-     */
-
-    return memcmp(ucs, uct, numChars*sizeof(Tcl_UniChar));
-
-#else /* !WORDS_BIGENDIAN */
-    /*
-     * We can't simply call memcmp() because that is not lexically correct.
-     */
-
-    for ( ; numChars != 0; ucs++, uct++, numChars--) {
-	if (*ucs != *uct) {
-	    return (*ucs - *uct);
-	}
-    }
-    return 0;
-#endif /* WORDS_BIGENDIAN */
-}
-
-#if !defined(TCL_NO_DEPRECATED)
-int
 Tcl_UniCharNcmp(
-    const unsigned short *ucs,	/* Unicode string to compare to uct. */
-    const unsigned short *uct,	/* Unicode string ucs is compared to. */
-    unsigned long numChars)	/* Number of chars (not bytes) to compare. */
+    CONST Tcl_UniChar *ucs,	/* Unicode string to compare to uct. */
+    CONST Tcl_UniChar *uct,	/* Unicode string ucs is compared to. */
+    unsigned long numChars)	/* Number of unichars to compare. */
 {
-#if defined(WORDS_BIGENDIAN)
+#ifdef WORDS_BIGENDIAN
     /*
      * We are definitely on a big-endian machine; memcmp() is safe
      */
@@ -2095,27 +1300,21 @@ Tcl_UniCharNcmp(
 
     for ( ; numChars != 0; ucs++, uct++, numChars--) {
 	if (*ucs != *uct) {
-	    /* special case for handling upper surrogates */
-	    if (((*ucs & 0xFC00) == 0xD800) && ((*uct & 0xFC00) != 0xD800)) {
-		return 1;
-	    } else if (((*uct & 0xFC00) == 0xD800)) {
-		return -1;
-	    }
 	    return (*ucs - *uct);
 	}
     }
     return 0;
 #endif /* WORDS_BIGENDIAN */
 }
-#endif
+
 /*
  *----------------------------------------------------------------------
  *
  * Tcl_UniCharNcasecmp --
  *
- *	Compare at most numChars chars (not bytes) of string ucs to string uct case
+ *	Compare at most numChars unichars of string ucs to string uct case
  *	insensitive. Both ucs and uct are assumed to be at least numChars
- *	chars long.
+ *	unichars long.
  *
  * Results:
  *	Return <0 if ucs < uct, 0 if ucs == uct, or >0 if ucs > uct.
@@ -2127,32 +1326,11 @@ Tcl_UniCharNcmp(
  */
 
 int
-TclUniCharNcasecmp(
-    const Tcl_UniChar *ucs,	/* Unicode string to compare to uct. */
-    const Tcl_UniChar *uct,	/* Unicode string ucs is compared to. */
-    size_t numChars)	/* Number of chars to compare. */
-{
-    for ( ; numChars != 0; numChars--, ucs++, uct++) {
-	if (*ucs != *uct) {
-	    Tcl_UniChar lcs = Tcl_UniCharToLower(*ucs);
-	    Tcl_UniChar lct = Tcl_UniCharToLower(*uct);
-
-	    if (lcs != lct) {
-		return (lcs - lct);
-	    }
-	}
-    }
-    return 0;
-}
-
-int
-TclUniCharNcasememcmp(
-    const void *ucsPtr,	/* Unicode string to compare to uct. */
-    const void *uctPtr,	/* Unicode string ucs is compared to. */
-    size_t numChars)	/* Number of chars (not bytes) to compare. */
+Tcl_UniCharNcasecmp(
+    CONST Tcl_UniChar *ucs,	/* Unicode string to compare to uct. */
+    CONST Tcl_UniChar *uct,	/* Unicode string ucs is compared to. */
+    unsigned long numChars)	/* Number of unichars to compare. */
 {
-    const Tcl_UniChar *ucs = (const Tcl_UniChar *)ucsPtr;
-    const Tcl_UniChar *uct = (const Tcl_UniChar *)uctPtr;
     for ( ; numChars != 0; numChars--, ucs++, uct++) {
 	if (*ucs != *uct) {
 	    Tcl_UniChar lcs = Tcl_UniCharToLower(*ucs);
@@ -2165,33 +1343,6 @@ TclUniCharNcasememcmp(
     }
     return 0;
 }
-
-#if !defined(TCL_NO_DEPRECATED)
-int
-Tcl_UniCharNcasecmp(
-    const unsigned short *ucs,	/* Unicode string to compare to uct. */
-    const unsigned short *uct,	/* Unicode string ucs is compared to. */
-    unsigned long numChars)	/* Number of chars (not bytes) to compare. */
-{
-    for ( ; numChars != 0; numChars--, ucs++, uct++) {
-	if (*ucs != *uct) {
-	    unsigned short lcs = Tcl_UniCharToLower(*ucs);
-	    unsigned short lct = Tcl_UniCharToLower(*uct);
-
-	    if (lcs != lct) {
-	    /* special case for handling upper surrogates */
-	    if (((lcs & 0xFC00) == 0xD800) && ((lct & 0xFC00) != 0xD800)) {
-		return 1;
-	    } else if (((lct & 0xFC00) == 0xD800)) {
-		return -1;
-	    }
-		return (lcs - lct);
-	    }
-	}
-    }
-    return 0;
-}
-#endif
 
 /*
  *----------------------------------------------------------------------
@@ -2213,9 +1364,6 @@ int
 Tcl_UniCharIsAlnum(
     int ch)			/* Unicode character to test. */
 {
-    if (UNICODE_OUT_OF_RANGE(ch)) {
-	return 0;
-    }
     return (((ALPHA_BITS | DIGIT_BITS) >> GetCategory(ch)) & 1);
 }
 
@@ -2239,9 +1387,6 @@ int
 Tcl_UniCharIsAlpha(
     int ch)			/* Unicode character to test. */
 {
-    if (UNICODE_OUT_OF_RANGE(ch)) {
-	return 0;
-    }
     return ((ALPHA_BITS >> GetCategory(ch)) & 1);
 }
 
@@ -2265,11 +1410,6 @@ int
 Tcl_UniCharIsControl(
     int ch)			/* Unicode character to test. */
 {
-    if (UNICODE_OUT_OF_RANGE(ch)) {
-	/* Clear away extension bits, if any */
-	ch &= 0x1FFFFF;
-	return ((ch == 0xE0001) || ((unsigned)(ch - 0xE0020) <= 0x5F));
-    }
     return ((CONTROL_BITS >> GetCategory(ch)) & 1);
 }
 
@@ -2293,9 +1433,6 @@ int
 Tcl_UniCharIsDigit(
     int ch)			/* Unicode character to test. */
 {
-    if (UNICODE_OUT_OF_RANGE(ch)) {
-	return 0;
-    }
     return (GetCategory(ch) == DECIMAL_DIGIT_NUMBER);
 }
 
@@ -2319,9 +1456,6 @@ int
 Tcl_UniCharIsGraph(
     int ch)			/* Unicode character to test. */
 {
-    if (UNICODE_OUT_OF_RANGE(ch)) {
-	return ((unsigned)((ch & 0x1FFFFF) - 0xE0100) <= 0xEF);
-    }
     return ((GRAPH_BITS >> GetCategory(ch)) & 1);
 }
 
@@ -2345,9 +1479,6 @@ int
 Tcl_UniCharIsLower(
     int ch)			/* Unicode character to test. */
 {
-    if (UNICODE_OUT_OF_RANGE(ch)) {
-	return 0;
-    }
     return (GetCategory(ch) == LOWERCASE_LETTER);
 }
 
@@ -2371,9 +1502,6 @@ int
 Tcl_UniCharIsPrint(
     int ch)			/* Unicode character to test. */
 {
-    if (UNICODE_OUT_OF_RANGE(ch)) {
-	return ((unsigned)((ch & 0x1FFFFF) - 0xE0100) <= 0xEF);
-    }
     return (((GRAPH_BITS|SPACE_BITS) >> GetCategory(ch)) & 1);
 }
 
@@ -2397,9 +1525,6 @@ int
 Tcl_UniCharIsPunct(
     int ch)			/* Unicode character to test. */
 {
-    if (UNICODE_OUT_OF_RANGE(ch)) {
-	return 0;
-    }
     return ((PUNCT_BITS >> GetCategory(ch)) & 1);
 }
 
@@ -2423,20 +1548,14 @@ int
 Tcl_UniCharIsSpace(
     int ch)			/* Unicode character to test. */
 {
-    /* Ignore upper 11 bits. */
-    ch &= 0x1FFFFF;
-
     /*
      * If the character is within the first 127 characters, just use the
      * standard C function, otherwise consult the Unicode table.
      */
 
-    if (ch < 0x80) {
-	return TclIsSpaceProcM((char) ch);
-    } else if (UNICODE_OUT_OF_RANGE(ch)) {
-	return 0;
-    } else if (ch == 0x0085 || ch == 0x180E || ch == 0x200B
-	    || ch == 0x202F || ch == 0x2060 || ch == 0xFEFF) {
+    if (((Tcl_UniChar) ch) < ((Tcl_UniChar) 0x80)) {
+	return TclIsSpaceProc((char) ch);
+    } else if ((Tcl_UniChar) ch == 0x180e || (Tcl_UniChar) ch == 0x202f) {
 	return 1;
     } else {
 	return ((SPACE_BITS >> GetCategory(ch)) & 1);
@@ -2463,9 +1582,6 @@ int
 Tcl_UniCharIsUpper(
     int ch)			/* Unicode character to test. */
 {
-    if (UNICODE_OUT_OF_RANGE(ch)) {
-	return 0;
-    }
     return (GetCategory(ch) == UPPERCASE_LETTER);
 }
 
@@ -2489,9 +1605,6 @@ int
 Tcl_UniCharIsWordChar(
     int ch)			/* Unicode character to test. */
 {
-    if (UNICODE_OUT_OF_RANGE(ch)) {
-	return 0;
-    }
     return ((WORD_BITS >> GetCategory(ch)) & 1);
 }
 
@@ -2519,182 +1632,14 @@ Tcl_UniCharIsWordChar(
  */
 
 int
-TclUniCharCaseMatch(
-    const Tcl_UniChar *uniStr,	/* Unicode String. */
-    const Tcl_UniChar *uniPattern,
-				/* Pattern, which may contain special
-				 * characters. */
-    int nocase)			/* 0 for case sensitive, 1 for insensitive */
-{
-    Tcl_UniChar ch1 = 0, p;
-
-    while (1) {
-	p = *uniPattern;
-
-	/*
-	 * See if we're at the end of both the pattern and the string. If so,
-	 * we succeeded. If we're at the end of the pattern but not at the end
-	 * of the string, we failed.
-	 */
-
-	if (p == 0) {
-	    return (*uniStr == 0);
-	}
-	if ((*uniStr == 0) && (p != '*')) {
-	    return 0;
-	}
-
-	/*
-	 * Check for a "*" as the next pattern character. It matches any
-	 * substring. We handle this by skipping all the characters up to the
-	 * next matching one in the pattern, and then calling ourselves
-	 * recursively for each postfix of string, until either we match or we
-	 * reach the end of the string.
-	 */
-
-	if (p == '*') {
-	    /*
-	     * Skip all successive *'s in the pattern
-	     */
-
-	    while (*(++uniPattern) == '*') {
-		/* empty body */
-	    }
-	    p = *uniPattern;
-	    if (p == 0) {
-		return 1;
-	    }
-	    if (nocase) {
-		p = Tcl_UniCharToLower(p);
-	    }
-	    while (1) {
-		/*
-		 * Optimization for matching - cruise through the string
-		 * quickly if the next char in the pattern isn't a special
-		 * character
-		 */
-
-		if ((p != '[') && (p != '?') && (p != '\\')) {
-		    if (nocase) {
-			while (*uniStr && (p != *uniStr)
-				&& (p != Tcl_UniCharToLower(*uniStr))) {
-			    uniStr++;
-			}
-		    } else {
-			while (*uniStr && (p != *uniStr)) {
-			    uniStr++;
-			}
-		    }
-		}
-		if (TclUniCharCaseMatch(uniStr, uniPattern, nocase)) {
-		    return 1;
-		}
-		if (*uniStr == 0) {
-		    return 0;
-		}
-		uniStr++;
-	    }
-	}
-
-	/*
-	 * Check for a "?" as the next pattern character. It matches any
-	 * single character.
-	 */
-
-	if (p == '?') {
-	    uniPattern++;
-	    uniStr++;
-	    continue;
-	}
-
-	/*
-	 * Check for a "[" as the next pattern character. It is followed by a
-	 * list of characters that are acceptable, or by a range (two
-	 * characters separated by "-").
-	 */
-
-	if (p == '[') {
-	    Tcl_UniChar startChar, endChar;
-
-	    uniPattern++;
-	    ch1 = (nocase ? Tcl_UniCharToLower(*uniStr) : *uniStr);
-	    uniStr++;
-	    while (1) {
-		if ((*uniPattern == ']') || (*uniPattern == 0)) {
-		    return 0;
-		}
-		startChar = (nocase ? Tcl_UniCharToLower(*uniPattern)
-			: *uniPattern);
-		uniPattern++;
-		if (*uniPattern == '-') {
-		    uniPattern++;
-		    if (*uniPattern == 0) {
-			return 0;
-		    }
-		    endChar = (nocase ? Tcl_UniCharToLower(*uniPattern)
-			    : *uniPattern);
-		    uniPattern++;
-		    if (((startChar <= ch1) && (ch1 <= endChar))
-			    || ((endChar <= ch1) && (ch1 <= startChar))) {
-			/*
-			 * Matches ranges of form [a-z] or [z-a].
-			 */
-			break;
-		    }
-		} else if (startChar == ch1) {
-		    break;
-		}
-	    }
-	    while (*uniPattern != ']') {
-		if (*uniPattern == 0) {
-		    uniPattern--;
-		    break;
-		}
-		uniPattern++;
-	    }
-	    uniPattern++;
-	    continue;
-	}
-
-	/*
-	 * If the next pattern character is '\', just strip off the '\' so we
-	 * do exact matching on the character that follows.
-	 */
-
-	if (p == '\\') {
-	    if (*(++uniPattern) == '\0') {
-		return 0;
-	    }
-	}
-
-	/*
-	 * There's no special character. Just make sure that the next bytes of
-	 * each string match.
-	 */
-
-	if (nocase) {
-	    if (Tcl_UniCharToLower(*uniStr) !=
-		    Tcl_UniCharToLower(*uniPattern)) {
-		return 0;
-	    }
-	} else if (*uniStr != *uniPattern) {
-	    return 0;
-	}
-	uniStr++;
-	uniPattern++;
-    }
-}
-
-#if !defined(TCL_NO_DEPRECATED)
-int
 Tcl_UniCharCaseMatch(
-    const unsigned short *uniStr,	/* Unicode String. */
-    const unsigned short *uniPattern,
+    CONST Tcl_UniChar *uniStr,	/* Unicode String. */
+    CONST Tcl_UniChar *uniPattern,
 				/* Pattern, which may contain special
 				 * characters. */
     int nocase)			/* 0 for case sensitive, 1 for insensitive */
 {
-    unsigned short ch1 = 0, p;
+    Tcl_UniChar ch1, p;
 
     while (1) {
 	p = *uniPattern;
@@ -2782,7 +1727,7 @@ Tcl_UniCharCaseMatch(
 	 */
 
 	if (p == '[') {
-	    unsigned short startChar, endChar;
+	    Tcl_UniChar startChar, endChar;
 
 	    uniPattern++;
 	    ch1 = (nocase ? Tcl_UniCharToLower(*uniStr) : *uniStr);
@@ -2852,7 +1797,6 @@ Tcl_UniCharCaseMatch(
 	uniPattern++;
     }
 }
-#endif
 
 /*
  *----------------------------------------------------------------------
@@ -2877,14 +1821,14 @@ Tcl_UniCharCaseMatch(
 
 int
 TclUniCharMatch(
-    const Tcl_UniChar *string,	/* Unicode String. */
-    Tcl_Size strLen,		/* Length of String */
-    const Tcl_UniChar *pattern,	/* Pattern, which may contain special
+    CONST Tcl_UniChar *string,	/* Unicode String. */
+    int strLen,			/* Length of String */
+    CONST Tcl_UniChar *pattern,	/* Pattern, which may contain special
 				 * characters. */
-    Tcl_Size ptnLen,		/* Length of Pattern */
+    int ptnLen,			/* Length of Pattern */
     int nocase)			/* 0 for case sensitive, 1 for insensitive */
 {
-    const Tcl_UniChar *stringEnd, *patternEnd;
+    CONST Tcl_UniChar *stringEnd, *patternEnd;
     Tcl_UniChar p;
 
     stringEnd = string + strLen;