1 files changed, 64 insertions, 57 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 11bde5c..ac76309 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -119,7 +119,7 @@ static int		Invalid(const char *src);
  *---------------------------------------------------------------------------
  */
 
-int
+size_t
 TclUtfCount(
     int ch)			/* The Unicode character whose size is returned. */
 {
@@ -314,13 +314,13 @@ three:
 char *
 Tcl_UniCharToUtfDString(
     const int *uniStr,	/* Unicode string to convert to UTF-8. */
-    int uniLength,		/* Length of Unicode string. */
+    size_t uniLength,		/* Length of Unicode string. */
     Tcl_DString *dsPtr)		/* UTF-8 representation of string is appended
 				 * to this previously initialized DString. */
 {
     const int *w, *wEnd;
     char *p, *string;
-    int oldLength;
+    size_t oldLength;
 
     /*
      * UTF-8 string length in bytes will be <= Unicode string length * 4.
@@ -329,7 +329,7 @@ Tcl_UniCharToUtfDString(
     if (uniStr == NULL) {
 	return NULL;
     }
-    if (uniLength < 0) {
+    if (uniLength == TCL_AUTO_LENGTH) {
 	uniLength = 0;
 	w = uniStr;
 	while (*w != '\0') {
@@ -355,13 +355,14 @@ Tcl_UniCharToUtfDString(
 char *
 Tcl_Char16ToUtfDString(
     const unsigned short *uniStr,/* Utf-16 string to convert to UTF-8. */
-    int uniLength,		/* Length of Utf-16 string. */
+    size_t uniLength,		/* Length of Utf-16 string. */
     Tcl_DString *dsPtr)		/* UTF-8 representation of string is appended
 				 * to this previously initialized DString. */
 {
     const unsigned short *w, *wEnd;
     char *p, *string;
-    int oldLength, len = 1;
+    size_t oldLength;
+    int len = 1;
 
     /*
      * UTF-8 string length in bytes will be <= Utf16 string length * 3.
@@ -370,7 +371,7 @@ Tcl_Char16ToUtfDString(
     if (uniStr == NULL) {
 	return NULL;
     }
-    if (uniLength < 0) {
+    if (uniLength == TCL_AUTO_LENGTH) {
 
 	uniLength = 0;
 	w = uniStr;
@@ -421,7 +422,7 @@ Tcl_Char16ToUtfDString(
  *	Tcl_UtfCharComplete() before calling this routine to ensure that
  *	enough bytes remain in the string.
  *
- *	If TCL_UTF_MAX <= 4, special handling of Surrogate pairs is done:
+ *	If TCL_UTF_MAX <= 3, special handling of Surrogate pairs is done:
  *	For any UTF-8 string containing a character outside of the BMP, the
  *	first call to this function will fill *chPtr with the high surrogate
  *	and generate a return value of 1. Calling Tcl_UtfToUniChar again
@@ -654,7 +655,7 @@ Tcl_UtfToChar16(
 int *
 Tcl_UtfToUniCharDString(
     const char *src,		/* UTF-8 string to convert to Unicode. */
-    int length,			/* Length of UTF-8 string in bytes, or -1 for
+    size_t length,			/* Length of UTF-8 string in bytes, or -1 for
 				 * strlen(). */
     Tcl_DString *dsPtr)		/* Unicode representation of string is
 				 * appended to this previously initialized
@@ -662,7 +663,7 @@ Tcl_UtfToUniCharDString(
 {
     int ch = 0, *w, *wString;
     const char *p;
-    int oldLength;
+    size_t oldLength;
     /* Pointer to the end of string. Never read endPtr[0] */
     const char *endPtr = src + length;
     /* Pointer to last byte where optimization still can be used */
@@ -671,7 +672,7 @@ Tcl_UtfToUniCharDString(
     if (src == NULL) {
 	return NULL;
     }
-    if (length < 0) {
+    if (length == TCL_AUTO_LENGTH) {
 	length = strlen(src);
     }
 
@@ -711,7 +712,7 @@ Tcl_UtfToUniCharDString(
 unsigned short *
 Tcl_UtfToChar16DString(
     const char *src,		/* UTF-8 string to convert to Unicode. */
-    int length,			/* Length of UTF-8 string in bytes, or -1 for
+    size_t length,			/* Length of UTF-8 string in bytes, or -1 for
 				 * strlen(). */
     Tcl_DString *dsPtr)		/* Unicode representation of string is
 				 * appended to this previously initialized
@@ -719,7 +720,7 @@ Tcl_UtfToChar16DString(
 {
     unsigned short ch = 0, *w, *wString;
     const char *p;
-    int oldLength;
+    size_t oldLength;
     /* Pointer to the end of string. Never read endPtr[0] */
     const char *endPtr = src + length;
     /* Pointer to last byte where optimization still can be used */
@@ -728,12 +729,12 @@ Tcl_UtfToChar16DString(
     if (src == NULL) {
 	return NULL;
     }
-    if (length < 0) {
+    if (length == TCL_AUTO_LENGTH) {
 	length = strlen(src);
     }
 
     /*
-     * Unicode string length in Tcl_UniChars will be <= UTF-8 string length in
+     * Unicode string length in WCHARs will be <= UTF-8 string length in
      * bytes.
      */
 
@@ -789,7 +790,7 @@ int
 Tcl_UtfCharComplete(
     const char *src,		/* String to check if first few bytes contain
 				 * a complete UTF-8 character. */
-    int length)			/* Length of above string in bytes. */
+    size_t length)			/* Length of above string in bytes. */
 {
     return length >= complete[UCHAR(*src)];
 }
@@ -812,18 +813,18 @@ Tcl_UtfCharComplete(
  *---------------------------------------------------------------------------
  */
 
-int
+size_t
 Tcl_NumUtfChars(
     const char *src,	/* The UTF-8 string to measure. */
-    int length)		/* The length of the string in bytes, or -1
-			 * for strlen(string). */
+    size_t length)	/* The length of the string in bytes, or
+			 * TCL_AUTO_LENGTH for strlen(src). */
 {
     Tcl_UniChar ch = 0;
-    int i = 0;
+    size_t i = 0;
 
-    if (length < 0) {
+    if (length == TCL_AUTO_LENGTH) {
 	/* string is NUL-terminated, so TclUtfToUniChar calls are safe. */
-	while ((*src != '\0') && (i < INT_MAX)) {
+	while (*src != '\0') {
 	    src += TclUtfToUniChar(src, &ch);
 	    i++;
 	}
@@ -966,7 +967,7 @@ const char *
 Tcl_UtfNext(
     const char *src)		/* The current location in the string. */
 {
-    int left;
+    size_t left;
     const char *next;
 
     if (((*src) & 0xC0) == 0x80) {
@@ -1140,15 +1141,15 @@ Tcl_UtfPrev(
 int
 Tcl_UniCharAtIndex(
     const char *src,	/* The UTF-8 string to dereference. */
-    int index)		/* The position of the desired character. */
+    size_t index)		/* The position of the desired character. */
 {
     Tcl_UniChar ch = 0;
     int i = 0;
 
-    if (index < 0) {
+    if (index == TCL_INDEX_NONE) {
 	return -1;
     }
-    while (index-- > 0) {
+    while (index--) {
 	i = TclUtfToUniChar(src, &ch);
 	src += i;
     }
@@ -1184,21 +1185,28 @@ Tcl_UniCharAtIndex(
 const char *
 Tcl_UtfAtIndex(
     const char *src,	/* The UTF-8 string. */
-    int index)		/* The position of the desired character. */
+    size_t index)		/* The position of the desired character. */
 {
     Tcl_UniChar ch = 0;
-    int len = 0;
+#if TCL_UTF_MAX <= 3
+    size_t len = 0;
+#endif
 
-    while (index-- > 0) {
-	len = TclUtfToUniChar(src, &ch);
-	src += len;
-    }
+    if (index != TCL_INDEX_NONE) {
+	while (index--) {
+#if TCL_UTF_MAX <= 3
+	    src += (len = TclUtfToUniChar(src, &ch));
+#else
+	    src += TclUtfToUniChar(src, &ch);
+#endif
+	}
 #if TCL_UTF_MAX <= 3
     if ((ch >= 0xD800) && (len < 3)) {
 	/* Index points at character following high Surrogate */
 	src += TclUtfToUniChar(src, &ch);
     }
 #endif
+    }
     return src;
 }
 
@@ -1228,7 +1236,7 @@ Tcl_UtfAtIndex(
  *---------------------------------------------------------------------------
  */
 
-int
+size_t
 Tcl_UtfBackslash(
     const char *src,		/* Points to the backslash character of a
 				 * backslash sequence. */
@@ -1238,8 +1246,7 @@ Tcl_UtfBackslash(
 				 * backslash sequence. */
 {
 #define LINE_LENGTH 128
-    int numRead;
-    int result;
+    size_t numRead, result;
 
     result = TclParseBackslash(src, LINE_LENGTH, &numRead, dst);
     if (numRead == LINE_LENGTH) {
@@ -1279,7 +1286,7 @@ Tcl_UtfToUpper(
 {
     int ch, upChar;
     char *src, *dst;
-    int len;
+    size_t len;
 
     /*
      * Iterate over the string until we hit the terminating null.
@@ -1332,7 +1339,7 @@ Tcl_UtfToLower(
 {
     int ch, lowChar;
     char *src, *dst;
-    int len;
+    size_t len;
 
     /*
      * Iterate over the string until we hit the terminating null.
@@ -1386,7 +1393,7 @@ Tcl_UtfToTitle(
 {
     int ch, titleChar, lowChar;
     char *src, *dst;
-    int len;
+    size_t len;
 
     /*
      * Capitalize the first character and then lowercase the rest of the
@@ -1448,7 +1455,7 @@ int
 TclpUtfNcmp2(
     const char *cs,		/* UTF string to compare to ct. */
     const char *ct,		/* UTF string cs is compared to. */
-    unsigned long numBytes)	/* Number of *bytes* to compare. */
+    size_t numBytes)	/* Number of *bytes* to compare. */
 {
     /*
      * We can't simply call 'memcmp(cs, ct, numBytes);' because we need to
@@ -1495,7 +1502,7 @@ int
 Tcl_UtfNcmp(
     const char *cs,		/* UTF string to compare to ct. */
     const char *ct,		/* UTF string cs is compared to. */
-    unsigned long numChars)	/* Number of UTF chars to compare. */
+    size_t numChars)	/* Number of UTF chars to compare. */
 {
     Tcl_UniChar ch1 = 0, ch2 = 0;
 
@@ -1553,7 +1560,7 @@ int
 Tcl_UtfNcasecmp(
     const char *cs,		/* UTF string to compare to ct. */
     const char *ct,		/* UTF string cs is compared to. */
-    unsigned long numChars)	/* Number of UTF chars to compare. */
+    size_t numChars)	/* Number of UTF chars to compare. */
 {
     Tcl_UniChar ch1 = 0, ch2 = 0;
 
@@ -1788,7 +1795,7 @@ Tcl_UniCharToTitle(
 /*
  *----------------------------------------------------------------------
  *
- * Tcl_UniCharLen --
+ * TclUniCharLen --
  *
  *	Find the length of a UniChar string. The str input must be null
  *	terminated.
@@ -1802,11 +1809,11 @@ Tcl_UniCharToTitle(
  *----------------------------------------------------------------------
  */
 
-int
-Tcl_UniCharLen(
+size_t
+TclUniCharLen(
     const Tcl_UniChar *uniStr)	/* Unicode string to find length of. */
 {
-    int len = 0;
+    size_t len = 0;
 
     while (*uniStr != '\0') {
 	len++;
@@ -1818,7 +1825,7 @@ Tcl_UniCharLen(
 /*
  *----------------------------------------------------------------------
  *
- * Tcl_UniCharNcmp --
+ * TclUniCharNcmp --
  *
  *	Compare at most numChars unichars of string ucs to string uct.
  *	Both ucs and uct are assumed to be at least numChars unichars long.
@@ -1833,10 +1840,10 @@ Tcl_UniCharLen(
  */
 
 int
-Tcl_UniCharNcmp(
+TclUniCharNcmp(
     const Tcl_UniChar *ucs,	/* Unicode string to compare to uct. */
     const Tcl_UniChar *uct,	/* Unicode string ucs is compared to. */
-    unsigned long numChars)	/* Number of unichars to compare. */
+    size_t numChars)	/* Number of unichars to compare. */
 {
 #ifdef WORDS_BIGENDIAN
     /*
@@ -1862,7 +1869,7 @@ Tcl_UniCharNcmp(
 /*
  *----------------------------------------------------------------------
  *
- * Tcl_UniCharNcasecmp --
+ * TclUniCharNcasecmp --
  *
  *	Compare at most numChars unichars of string ucs to string uct case
  *	insensitive. Both ucs and uct are assumed to be at least numChars
@@ -1878,10 +1885,10 @@ Tcl_UniCharNcmp(
  */
 
 int
-Tcl_UniCharNcasecmp(
+TclUniCharNcasecmp(
     const Tcl_UniChar *ucs,	/* Unicode string to compare to uct. */
     const Tcl_UniChar *uct,	/* Unicode string ucs is compared to. */
-    unsigned long numChars)	/* Number of unichars to compare. */
+    size_t numChars)	/* Number of unichars to compare. */
 {
     for ( ; numChars != 0; numChars--, ucs++, uct++) {
 	if (*ucs != *uct) {
@@ -2207,7 +2214,7 @@ Tcl_UniCharIsWordChar(
 /*
  *----------------------------------------------------------------------
  *
- * Tcl_UniCharCaseMatch --
+ * TclUniCharCaseMatch --
  *
  *	See if a particular Unicode string matches a particular pattern.
  *	Allows case insensitivity. This is the Unicode equivalent of the char*
@@ -2228,7 +2235,7 @@ Tcl_UniCharIsWordChar(
  */
 
 int
-Tcl_UniCharCaseMatch(
+TclUniCharCaseMatch(
     const Tcl_UniChar *uniStr,	/* Unicode String. */
     const Tcl_UniChar *uniPattern,
 				/* Pattern, which may contain special
@@ -2295,7 +2302,7 @@ Tcl_UniCharCaseMatch(
 			}
 		    }
 		}
-		if (Tcl_UniCharCaseMatch(uniStr, uniPattern, nocase)) {
+		if (TclUniCharCaseMatch(uniStr, uniPattern, nocase)) {
 		    return 1;
 		}
 		if (*uniStr == 0) {
@@ -2401,7 +2408,7 @@ Tcl_UniCharCaseMatch(
  *
  *	See if a particular Unicode string matches a particular pattern.
  *	Allows case insensitivity. This is the Unicode equivalent of the char*
- *	Tcl_StringCaseMatch. This variant of Tcl_UniCharCaseMatch uses counted
+ *	Tcl_StringCaseMatch. This variant of TclUniCharCaseMatch uses counted
  *	Strings, so embedded NULLs are allowed.
  *
  * Results:
@@ -2418,10 +2425,10 @@ Tcl_UniCharCaseMatch(
 int
 TclUniCharMatch(
     const Tcl_UniChar *string,	/* Unicode String. */
-    int strLen,			/* Length of String */
+    size_t strLen,			/* Length of String */
     const Tcl_UniChar *pattern,	/* Pattern, which may contain special
 				 * characters. */
-    int ptnLen,			/* Length of Pattern */
+    size_t ptnLen,			/* Length of Pattern */
     int nocase)			/* 0 for case sensitive, 1 for insensitive */
 {
     const Tcl_UniChar *stringEnd, *patternEnd;