diff options
Diffstat (limited to 'generic/tclUtf.c')
| -rw-r--r-- | generic/tclUtf.c | 1226 | 
1 files changed, 657 insertions, 569 deletions
| diff --git a/generic/tclUtf.c b/generic/tclUtf.c index b7a6277..b33bf6a 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -5,10 +5,8 @@   *   * Copyright (c) 1997-1998 Sun Microsystems, Inc.   * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. - * - * RCS: @(#) $Id: tclUtf.c,v 1.30.2.2 2003/10/08 14:21:20 dkf Exp $ + * See the file "license.terms" for information on usage and redistribution of + * this file, and for a DISCLAIMER OF ALL WARRANTIES.   */  #include "tclInt.h" @@ -20,49 +18,48 @@  #include "tclUniData.c"  /* - * The following macros are used for fast character category tests.  The - * x_BITS values are shifted right by the category value to determine whether - * the given category is included in the set. - */  + * The following macros are used for fast character category tests. The x_BITS + * values are shifted right by the category value to determine whether the + * given category is included in the set. + */  #define ALPHA_BITS ((1 << UPPERCASE_LETTER) | (1 << LOWERCASE_LETTER) \ -    | (1 << TITLECASE_LETTER) | (1 << MODIFIER_LETTER) | (1 << OTHER_LETTER)) +	| (1 << TITLECASE_LETTER) | (1 << MODIFIER_LETTER) | (1<<OTHER_LETTER)) + +#define CONTROL_BITS ((1 << CONTROL) | (1 << FORMAT) | (1 << PRIVATE_USE))  #define DIGIT_BITS (1 << DECIMAL_DIGIT_NUMBER)  #define SPACE_BITS ((1 << SPACE_SEPARATOR) | (1 << LINE_SEPARATOR) \ -    | (1 << PARAGRAPH_SEPARATOR)) - -#define CONNECTOR_BITS (1 << CONNECTOR_PUNCTUATION) +	| (1 << PARAGRAPH_SEPARATOR)) -#define PRINT_BITS (ALPHA_BITS | DIGIT_BITS | SPACE_BITS | \ -	    (1 << NON_SPACING_MARK) | (1 << ENCLOSING_MARK) | \ -	    (1 << COMBINING_SPACING_MARK) | (1 << LETTER_NUMBER) | \ -	    (1 << OTHER_NUMBER) | (1 << CONNECTOR_PUNCTUATION) | \ -	    (1 << DASH_PUNCTUATION) | (1 << OPEN_PUNCTUATION) | \ -	    (1 << CLOSE_PUNCTUATION) | (1 << INITIAL_QUOTE_PUNCTUATION) | \ -	    (1 << FINAL_QUOTE_PUNCTUATION) | (1 << OTHER_PUNCTUATION) | \ -	    (1 << MATH_SYMBOL) | (1 << CURRENCY_SYMBOL) | \ -	    (1 << MODIFIER_SYMBOL) | (1 << OTHER_SYMBOL)) +#define WORD_BITS (ALPHA_BITS | DIGIT_BITS | (1 << CONNECTOR_PUNCTUATION))  #define PUNCT_BITS ((1 << CONNECTOR_PUNCTUATION) | \ -	    (1 << DASH_PUNCTUATION) | (1 << OPEN_PUNCTUATION) | \ -	    (1 << CLOSE_PUNCTUATION) | (1 << INITIAL_QUOTE_PUNCTUATION) | \ -	    (1 << FINAL_QUOTE_PUNCTUATION) | (1 << OTHER_PUNCTUATION)) +	(1 << DASH_PUNCTUATION) | (1 << OPEN_PUNCTUATION) | \ +	(1 << CLOSE_PUNCTUATION) | (1 << INITIAL_QUOTE_PUNCTUATION) | \ +	(1 << FINAL_QUOTE_PUNCTUATION) | (1 << OTHER_PUNCTUATION)) + +#define GRAPH_BITS (WORD_BITS | PUNCT_BITS | \ +	(1 << NON_SPACING_MARK) | (1 << ENCLOSING_MARK) | \ +	(1 << COMBINING_SPACING_MARK) | (1 << LETTER_NUMBER) | \ +	(1 << OTHER_NUMBER) | \ +	(1 << MATH_SYMBOL) | (1 << CURRENCY_SYMBOL) | \ +	(1 << MODIFIER_SYMBOL) | (1 << OTHER_SYMBOL))  /* - * Unicode characters less than this value are represented by themselves  - * in UTF-8 strings.  + * Unicode characters less than this value are represented by themselves in + * UTF-8 strings.   */  #define UNICODE_SELF	0x80  /* - * The following structures are used when mapping between Unicode (UCS-2) - * and UTF-8. + * The following structures are used when mapping between Unicode (UCS-2) and + * UTF-8.   */ -static CONST unsigned char totalBytes[256] = { +static const unsigned char totalBytes[256] = {      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, @@ -76,29 +73,13 @@ static CONST unsigned char totalBytes[256] = {  #else      1,1,1,1,1,1,1,1,  #endif -#if TCL_UTF_MAX > 4 -    5,5,5,5, -#else -    1,1,1,1, -#endif -#if TCL_UTF_MAX > 5 -    6,6,6,6 -#else -    1,1,1,1 -#endif +    1,1,1,1,1,1,1,1  }; - -/* - * Procedures used only in this module. - */ - -static int UtfCount _ANSI_ARGS_((int ch)); -  /*   *---------------------------------------------------------------------------   * - * UtfCount -- + * TclUtfCount --   *   *	Find the number of bytes in the Utf character "ch".   * @@ -110,30 +91,21 @@ static int UtfCount _ANSI_ARGS_((int ch));   *   *---------------------------------------------------------------------------   */ -  -INLINE static int -UtfCount(ch) -    int ch;			/* The Tcl_UniChar whose size is returned. */ + +int +TclUtfCount( +    int ch)			/* The Tcl_UniChar whose size is returned. */  { -    if ((ch > 0) && (ch < UNICODE_SELF)) { +    if ((unsigned)(ch - 1) < (UNICODE_SELF - 1)) {  	return 1;      }      if (ch <= 0x7FF) {  	return 2;      } -    if (ch <= 0xFFFF) { -	return 3; -    }  #if TCL_UTF_MAX > 3 -    if (ch <= 0x1FFFFF) { +    if (((unsigned)(ch - 0x10000) <= 0xfffff)) {  	return 4;      } -    if (ch <= 0x3FFFFFF) { -	return 5; -    } -    if (ch <= 0x7FFFFFFF) { -	return 6; -    }  #endif      return 3;  } @@ -144,73 +116,75 @@ UtfCount(ch)   * Tcl_UniCharToUtf --   *   *	Store the given Tcl_UniChar as a sequence of UTF-8 bytes in the - *	provided buffer.  Equivalent to Plan 9 runetochar(). + *	provided buffer. Equivalent to Plan 9 runetochar().   *   * Results: - *	The return values is the number of bytes in the buffer that - *	were consumed.   + *	The return values is the number of bytes in the buffer that were + *	consumed.   *   * Side effects:   *	None.   *   *---------------------------------------------------------------------------   */ -  -INLINE int -Tcl_UniCharToUtf(ch, str) -    int ch;			/* The Tcl_UniChar to be stored in the + +int +Tcl_UniCharToUtf( +    int ch,			/* The Tcl_UniChar to be stored in the  				 * buffer. */ -    char *str;			/* Buffer in which the UTF-8 representation -				 * of the Tcl_UniChar is stored.  Buffer must -				 * be large enough to hold the UTF-8 character +    char *buf)			/* Buffer in which the UTF-8 representation of +				 * the Tcl_UniChar is stored. Buffer must be +				 * large enough to hold the UTF-8 character  				 * (at most TCL_UTF_MAX bytes). */  { -    if ((ch > 0) && (ch < UNICODE_SELF)) { -	str[0] = (char) ch; +    if ((unsigned)(ch - 1) < (UNICODE_SELF - 1)) { +	buf[0] = (char) ch;  	return 1;      } -    if (ch <= 0x7FF) { -	str[1] = (char) ((ch | 0x80) & 0xBF); -	str[0] = (char) ((ch >> 6) | 0xC0); -	return 2; -    } -    if (ch <= 0xFFFF) { -	three: -	str[2] = (char) ((ch | 0x80) & 0xBF); -	str[1] = (char) (((ch >> 6) | 0x80) & 0xBF); -	str[0] = (char) ((ch >> 12) | 0xE0); -	return 3; -    } +    if (ch >= 0) { +	if (ch <= 0x7FF) { +	    buf[1] = (char) ((ch | 0x80) & 0xBF); +	    buf[0] = (char) ((ch >> 6) | 0xC0); +	    return 2; +	} +	if (ch <= 0xFFFF) { +#if TCL_UTF_MAX == 4 +	    if ((ch & 0xF800) == 0xD800) { +		if (ch & 0x0400) { +		    /* Low surrogate */ +		    buf[3] = (char) ((ch | 0x80) & 0xBF); +		    buf[2] |= (char) (((ch >> 6) | 0x80) & 0x8F); +		    return 4; +		} else { +		    /* High surrogate */ +		    ch += 0x40; +		    buf[2] = (char) (((ch << 4) | 0x80) & 0xB0); +		    buf[1] = (char) (((ch >> 2) | 0x80) & 0xBF); +		    buf[0] = (char) (((ch >> 8) | 0xF0) & 0xF7); +		    return 0; +		} +	    } +#endif +	    goto three; +	}  #if TCL_UTF_MAX > 3 -    if (ch <= 0x1FFFFF) { -	str[3] = (char) ((ch | 0x80) & 0xBF); -	str[2] = (char) (((ch >> 6) | 0x80) & 0xBF); -	str[1] = (char) (((ch >> 12) | 0x80) & 0xBF); -	str[0] = (char) ((ch >> 18) | 0xF0); -	return 4; -    } -    if (ch <= 0x3FFFFFF) { -	str[4] = (char) ((ch | 0x80) & 0xBF); -	str[3] = (char) (((ch >> 6) | 0x80) & 0xBF); -	str[2] = (char) (((ch >> 12) | 0x80) & 0xBF); -	str[1] = (char) (((ch >> 18) | 0x80) & 0xBF); -	str[0] = (char) ((ch >> 24) | 0xF8); -	return 5; -    } -    if (ch <= 0x7FFFFFFF) { -	str[5] = (char) ((ch | 0x80) & 0xBF); -	str[4] = (char) (((ch >> 6) | 0x80) & 0xBF); -	str[3] = (char) (((ch >> 12) | 0x80) & 0xBF); -	str[2] = (char) (((ch >> 18) | 0x80) & 0xBF); -	str[1] = (char) (((ch >> 24) | 0x80) & 0xBF); -	str[0] = (char) ((ch >> 30) | 0xFC); -	return 6; -    } +	if (ch <= 0x10FFFF) { +	    buf[3] = (char) ((ch | 0x80) & 0xBF); +	    buf[2] = (char) (((ch >> 6) | 0x80) & 0xBF); +	    buf[1] = (char) (((ch >> 12) | 0x80) & 0xBF); +	    buf[0] = (char) ((ch >> 18) | 0xF0); +	    return 4; +	}  #endif +    }      ch = 0xFFFD; -    goto three; +three: +    buf[2] = (char) ((ch | 0x80) & 0xBF); +    buf[1] = (char) (((ch >> 6) | 0x80) & 0xBF); +    buf[0] = (char) ((ch >> 12) | 0xE0); +    return 3;  }  /* @@ -222,25 +196,24 @@ Tcl_UniCharToUtf(ch, str)   *   * Results:   *	The return value is a pointer to the UTF-8 representation of the - *	Unicode string.  Storage for the return value is appended to the - *	end of dsPtr. + *	Unicode string. Storage for the return value is appended to the end of + *	dsPtr.   *   * Side effects:   *	None.   *   *---------------------------------------------------------------------------   */ -  +  char * -Tcl_UniCharToUtfDString(wString, numChars, dsPtr) -    CONST Tcl_UniChar *wString;	/* Unicode string to convert to UTF-8. */ -    int numChars;		/* Length of Unicode string in Tcl_UniChars +Tcl_UniCharToUtfDString( +    const Tcl_UniChar *uniStr,	/* Unicode string to convert to UTF-8. */ +    int uniLength,		/* Length of Unicode string in Tcl_UniChars  				 * (must be >= 0). */ -    Tcl_DString *dsPtr;		/* UTF-8 representation of string is -				 * appended to this previously initialized -				 * DString. */ +    Tcl_DString *dsPtr)		/* UTF-8 representation of string is appended +				 * to this previously initialized DString. */  { -    CONST Tcl_UniChar *w, *wEnd; +    const Tcl_UniChar *w, *wEnd;      char *p, *string;      int oldLength; @@ -250,12 +223,12 @@ Tcl_UniCharToUtfDString(wString, numChars, dsPtr)       */      oldLength = Tcl_DStringLength(dsPtr); -    Tcl_DStringSetLength(dsPtr, (oldLength + numChars + 1) * TCL_UTF_MAX); +    Tcl_DStringSetLength(dsPtr, (oldLength + uniLength + 1) * TCL_UTF_MAX);      string = Tcl_DStringValue(dsPtr) + oldLength;      p = string; -    wEnd = wString + numChars; -    for (w = wString; w < wEnd; ) { +    wEnd = uniStr + uniLength; +    for (w = uniStr; w < wEnd; ) {  	p += Tcl_UniCharToUtf(*w, p);  	w++;      } @@ -269,16 +242,16 @@ Tcl_UniCharToUtfDString(wString, numChars, dsPtr)   *   * Tcl_UtfToUniChar --   * - *	Extract the Tcl_UniChar represented by the UTF-8 string.  Bad - *	UTF-8 sequences are converted to valid Tcl_UniChars and processing - *	continues.  Equivalent to Plan 9 chartorune(). + *	Extract the Tcl_UniChar represented by the UTF-8 string. Bad UTF-8 + *	sequences are converted to valid Tcl_UniChars and processing + *	continues. Equivalent to Plan 9 chartorune().   * - *	The caller must ensure that the source buffer is long enough that - *	this routine does not run off the end and dereference non-existent - *	memory looking for trail bytes.  If the source buffer is known to - *	be '\0' terminated, this cannot happen.  Otherwise, the caller - *	should call Tcl_UtfCharComplete() before calling this routine to - *	ensure that enough bytes remain in the string. + *	The caller must ensure that the source buffer is long enough that this + *	routine does not run off the end and dereference non-existent memory + *	looking for trail bytes. If the source buffer is known to be '\0' + *	terminated, this cannot happen. Otherwise, the caller should call + *	Tcl_UtfCharComplete() before calling this routine to ensure that + *	enough bytes remain in the string.   *   * Results:   *	*chPtr is filled with the Tcl_UniChar, and the return value is the @@ -289,20 +262,20 @@ Tcl_UniCharToUtfDString(wString, numChars, dsPtr)   *   *---------------------------------------------------------------------------   */ -  +  int -Tcl_UtfToUniChar(str, chPtr) -    register CONST char *str;	 /* The UTF-8 string. */ -    register Tcl_UniChar *chPtr; /* Filled with the Tcl_UniChar represented -				  * by the UTF-8 string. */ +Tcl_UtfToUniChar( +    register const char *src,	/* The UTF-8 string. */ +    register Tcl_UniChar *chPtr)/* Filled with the Tcl_UniChar represented by +				 * the UTF-8 string. */  {      register int byte; -     +      /*       * Unroll 1 to 3 byte UTF-8 sequences, use loop to handle longer ones.       */ -    byte = *((unsigned char *) str); +    byte = *((unsigned char *) src);      if (byte < 0xC0) {  	/*  	 * Handles properly formed UTF-8 characters between 0x01 and 0x7F. @@ -313,60 +286,51 @@ Tcl_UtfToUniChar(str, chPtr)  	*chPtr = (Tcl_UniChar) byte;  	return 1;      } else if (byte < 0xE0) { -	if ((str[1] & 0xC0) == 0x80) { +	if ((src[1] & 0xC0) == 0x80) {  	    /*  	     * Two-byte-character lead-byte followed by a trail-byte.  	     */ -	    *chPtr = (Tcl_UniChar) (((byte & 0x1F) << 6) | (str[1] & 0x3F)); +	    *chPtr = (Tcl_UniChar) (((byte & 0x1F) << 6) | (src[1] & 0x3F));  	    return 2;  	} +  	/*  	 * A two-byte-character lead-byte not followed by trail-byte  	 * represents itself.  	 */ - -	*chPtr = (Tcl_UniChar) byte; -	return 1;      } else if (byte < 0xF0) { -	if (((str[1] & 0xC0) == 0x80) && ((str[2] & 0xC0) == 0x80)) { +	if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80)) {  	    /*  	     * Three-byte-character lead byte followed by two trail bytes.  	     */ -	    *chPtr = (Tcl_UniChar) (((byte & 0x0F) << 12)  -		    | ((str[1] & 0x3F) << 6) | (str[2] & 0x3F)); +	    *chPtr = (Tcl_UniChar) (((byte & 0x0F) << 12) +		    | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F));  	    return 3;  	} +  	/*  	 * A three-byte-character lead-byte not followed by two trail-bytes  	 * represents itself.  	 */ - -	*chPtr = (Tcl_UniChar) byte; -	return 1;      }  #if TCL_UTF_MAX > 3 -    else { -	int ch, total, trail; - -	total = totalBytes[byte]; -	trail = total - 1; -	if (trail > 0) { -	    ch = byte & (0x3F >> trail); -	    do { -		str++; -		if ((*str & 0xC0) != 0x80) { -		    *chPtr = byte; -		    return 1; -		} -		ch <<= 6; -		ch |= (*str & 0x3F); -		trail--; -	    } while (trail > 0); -	    *chPtr = ch; -	    return total; +    else if (byte < 0xF8) { +	if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) { +	    /* +	     * Four-byte-character lead byte followed by three trail bytes. +	     */ + +	    *chPtr = (Tcl_UniChar) (((byte & 0x0E) << 18) | ((src[1] & 0x3F) << 12) +		    | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)); +	    return 4;  	} + +	/* +	 * A three-byte-character lead-byte not followed by two trail-bytes +	 * represents itself. +	 */      }  #endif @@ -383,9 +347,8 @@ Tcl_UtfToUniChar(str, chPtr)   *   * Results:   *	The return value is a pointer to the Unicode representation of the - *	UTF-8 string.  Storage for the return value is appended to the - *	end of dsPtr.  The Unicode string is terminated with a Unicode - *	NULL character. + *	UTF-8 string. Storage for the return value is appended to the end of + *	dsPtr. The Unicode string is terminated with a Unicode NULL character.   *   * Side effects:   *	None. @@ -394,35 +357,36 @@ Tcl_UtfToUniChar(str, chPtr)   */  Tcl_UniChar * -Tcl_UtfToUniCharDString(string, length, dsPtr) -    CONST char *string;		/* UTF-8 string to convert to Unicode. */ -    int length;			/* Length of UTF-8 string in bytes, or -1 -				 * for strlen(). */ -    Tcl_DString *dsPtr;		/* Unicode representation of string is +Tcl_UtfToUniCharDString( +    const char *src,		/* UTF-8 string to convert to Unicode. */ +    int length,			/* Length of UTF-8 string in bytes, or -1 for +				 * strlen(). */ +    Tcl_DString *dsPtr)		/* Unicode representation of string is  				 * appended to this previously initialized  				 * DString. */  {      Tcl_UniChar *w, *wString; -    CONST char *p, *end; +    const char *p, *end;      int oldLength;      if (length < 0) { -	length = strlen(string); +	length = strlen(src);      }      /* -     * Unicode string length in Tcl_UniChars will be <= UTF-8 string length -     * in bytes. +     * Unicode string length in Tcl_UniChars will be <= UTF-8 string length in +     * bytes.       */      oldLength = Tcl_DStringLength(dsPtr); +/* TODO: fix overreach! */      Tcl_DStringSetLength(dsPtr,  	    (int) ((oldLength + length + 1) * sizeof(Tcl_UniChar)));      wString = (Tcl_UniChar *) (Tcl_DStringValue(dsPtr) + oldLength);      w = wString; -    end = string + length; -    for (p = string; p < end; ) { +    end = src + length; +    for (p = src; p < end; ) {  	p += TclUtfToUniChar(p, w);  	w++;      } @@ -438,9 +402,9 @@ Tcl_UtfToUniCharDString(string, length, dsPtr)   *   * Tcl_UtfCharComplete --   * - *	Determine if the UTF-8 string of the given length is long enough - *	to be decoded by Tcl_UtfToUniChar().  This does not ensure that the - *	UTF-8 string is properly formed.  Equivalent to Plan 9 fullrune(). + *	Determine if the UTF-8 string of the given length is long enough to be + *	decoded by Tcl_UtfToUniChar(). This does not ensure that the UTF-8 + *	string is properly formed. Equivalent to Plan 9 fullrune().   *   * Results:   *	The return value is 0 if the string is not long enough, non-zero @@ -453,15 +417,15 @@ Tcl_UtfToUniCharDString(string, length, dsPtr)   */  int -Tcl_UtfCharComplete(str, len) -    CONST char *str;		/* String to check if first few bytes -				 * contain a complete UTF-8 character. */ -    int len;			/* Length of above string in bytes. */ +Tcl_UtfCharComplete( +    const char *src,		/* String to check if first few bytes contain +				 * a complete UTF-8 character. */ +    int length)			/* Length of above string in bytes. */  {      int ch; -    ch = *((unsigned char *) str); -    return len >= totalBytes[ch]; +    ch = *((unsigned char *) src); +    return length >= totalBytes[ch];  }  /* @@ -469,23 +433,23 @@ Tcl_UtfCharComplete(str, len)   *   * Tcl_NumUtfChars --   * - *	Returns the number of characters (not bytes) in the UTF-8 string, - *	not including the terminating NULL byte.  This is equivalent to - *	Plan 9 utflen() and utfnlen(). + *	Returns the number of characters (not bytes) in the UTF-8 string, not + *	including the terminating NULL byte. This is equivalent to Plan 9 + *	utflen() and utfnlen().   *   * Results: - *	As above.   + *	As above.   *   * Side effects:   *	None.   *   *---------------------------------------------------------------------------   */ -  -int  -Tcl_NumUtfChars(str, len) -    register CONST char *str;	/* The UTF-8 string to measure. */ -    int len;			/* The length of the string in bytes, or -1 + +int +Tcl_NumUtfChars( +    register const char *src,	/* The UTF-8 string to measure. */ +    int length)			/* The length of the string in bytes, or -1  				 * for strlen(string). */  {      Tcl_UniChar ch; @@ -495,27 +459,27 @@ Tcl_NumUtfChars(str, len)      /*       * The separate implementations are faster.       * -     * Since this is a time-sensitive function, we also do the check for -     * the single-byte char case specially. +     * Since this is a time-sensitive function, we also do the check for the +     * single-byte char case specially.       */      i = 0; -    if (len < 0) { -	while (*str != '\0') { -	    str += TclUtfToUniChar(str, chPtr); +    if (length < 0) { +	while (*src != '\0') { +	    src += TclUtfToUniChar(src, chPtr);  	    i++;  	}      } else {  	register int n; -	while (len > 0) { -	    if (UCHAR(*str) < 0xC0) { -		len--; -		str++; +	while (length > 0) { +	    if (UCHAR(*src) < 0xC0) { +		length--; +		src++;  	    } else { -		n = Tcl_UtfToUniChar(str, chPtr); -		len -= n; -		str += n; +		n = Tcl_UtfToUniChar(src, chPtr); +		length -= n; +		src += n;  	    }  	    i++;  	} @@ -528,37 +492,37 @@ Tcl_NumUtfChars(str, len)   *   * Tcl_UtfFindFirst --   * - *	Returns a pointer to the first occurance of the given Tcl_UniChar - *	in the NULL-terminated UTF-8 string.  The NULL terminator is - *	considered part of the UTF-8 string.  Equivalent to Plan 9 - *	utfrune(). + *	Returns a pointer to the first occurance of the given Tcl_UniChar in + *	the NULL-terminated UTF-8 string. The NULL terminator is considered + *	part of the UTF-8 string. Equivalent to Plan 9 utfrune().   *   * Results: - *	As above.  If the Tcl_UniChar does not exist in the given string, - *	the return value is NULL. + *	As above. If the Tcl_UniChar does not exist in the given string, the + *	return value is NULL.   *   * Side effects:   *	None.   *   *---------------------------------------------------------------------------   */ -CONST char * -Tcl_UtfFindFirst(string, ch) -    CONST char *string;		/* The UTF-8 string to be searched. */ -    int ch;			/* The Tcl_UniChar to search for. */ + +const char * +Tcl_UtfFindFirst( +    const char *src,		/* The UTF-8 string to be searched. */ +    int ch)			/* The Tcl_UniChar to search for. */  {      int len;      Tcl_UniChar find; -     +      while (1) { -	len = TclUtfToUniChar(string, &find); +	len = TclUtfToUniChar(src, &find);  	if (find == ch) { -	    return string; +	    return src;  	} -	if (*string == '\0') { +	if (*src == '\0') {  	    return NULL;  	} -	string += len; +	src += len;      }  } @@ -567,14 +531,13 @@ Tcl_UtfFindFirst(string, ch)   *   * Tcl_UtfFindLast --   * - *	Returns a pointer to the last occurance of the given Tcl_UniChar - *	in the NULL-terminated UTF-8 string.  The NULL terminator is - *	considered part of the UTF-8 string.  Equivalent to Plan 9 - *	utfrrune(). + *	Returns a pointer to the last occurance of the given Tcl_UniChar in + *	the NULL-terminated UTF-8 string. The NULL terminator is considered + *	part of the UTF-8 string. Equivalent to Plan 9 utfrrune().   *   * Results: - *	As above.  If the Tcl_UniChar does not exist in the given string, - *	the return value is NULL. + *	As above. If the Tcl_UniChar does not exist in the given string, the + *	return value is NULL.   *   * Side effects:   *	None. @@ -582,25 +545,25 @@ Tcl_UtfFindFirst(string, ch)   *---------------------------------------------------------------------------   */ -CONST char * -Tcl_UtfFindLast(string, ch) -    CONST char *string;		/* The UTF-8 string to be searched. */ -    int ch;			/* The Tcl_UniChar to search for. */ +const char * +Tcl_UtfFindLast( +    const char *src,		/* The UTF-8 string to be searched. */ +    int ch)			/* The Tcl_UniChar to search for. */  {      int len;      Tcl_UniChar find; -    CONST char *last; -	 +    const char *last; +      last = NULL;      while (1) { -	len = TclUtfToUniChar(string, &find); +	len = TclUtfToUniChar(src, &find);  	if (find == ch) { -	    last = string; +	    last = src;  	} -	if (*string == '\0') { +	if (*src == '\0') {  	    break;  	} -	string += len; +	src += len;      }      return last;  } @@ -610,28 +573,27 @@ Tcl_UtfFindLast(string, ch)   *   * Tcl_UtfNext --   * - *	Given a pointer to some current location in a UTF-8 string, - *	move forward one character.  The caller must ensure that they - *	are not asking for the next character after the last character - *	in the string. + *	Given a pointer to some current location in a UTF-8 string, move + *	forward one character. The caller must ensure that they are not asking + *	for the next character after the last character in the string.   *   * Results: - *	The return value is the pointer to the next character in - *	the UTF-8 string. + *	The return value is the pointer to the next character in the UTF-8 + *	string.   *   * Side effects:   *	None.   *   *---------------------------------------------------------------------------   */ -  -CONST char * -Tcl_UtfNext(str)  -    CONST char *str;		    /* The current location in the string. */ + +const char * +Tcl_UtfNext( +    const char *src)		/* The current location in the string. */  {      Tcl_UniChar ch; -    return str + TclUtfToUniChar(str, &ch); +    return src + TclUtfToUniChar(src, &ch);  }  /* @@ -639,15 +601,15 @@ Tcl_UtfNext(str)   *   * Tcl_UtfPrev --   * - *	Given a pointer to some current location in a UTF-8 string, - *	move backwards one character.  This works correctly when the - *	pointer is in the middle of a UTF-8 character. + *	Given a pointer to some current location in a UTF-8 string, move + *	backwards one character. This works correctly when the pointer is in + *	the middle of a UTF-8 character.   *   * Results: - *	The return value is a pointer to the previous character in the - *	UTF-8 string.  If the current location was already at the - *	beginning of the string, the return value will also be a - *	pointer to the beginning of the string. + *	The return value is a pointer to the previous character in the UTF-8 + *	string. If the current location was already at the beginning of the + *	string, the return value will also be a pointer to the beginning of + *	the string.   *   * Side effects:   *	None. @@ -655,22 +617,21 @@ Tcl_UtfNext(str)   *---------------------------------------------------------------------------   */ -CONST char * -Tcl_UtfPrev(str, start) -    CONST char *str;		    /* The current location in the string. */ -    CONST char *start;		    /* Pointer to the beginning of the -				     * string, to avoid going backwards too -				     * far. */ +const char * +Tcl_UtfPrev( +    const char *src,		/* The current location in the string. */ +    const char *start)		/* Pointer to the beginning of the string, to +				 * avoid going backwards too far. */  { -    CONST char *look; +    const char *look;      int i, byte; -     -    str--; -    look = str; + +    src--; +    look = src;      for (i = 0; i < TCL_UTF_MAX; i++) {  	if (look < start) { -	    if (str < start) { -		str = start; +	    if (src < start) { +		src = start;  	    }  	    break;  	} @@ -683,16 +644,16 @@ Tcl_UtfPrev(str, start)  	}  	look--;      } -    return str; +    return src;  } -	 +  /*   *---------------------------------------------------------------------------   *   * Tcl_UniCharAtIndex --   * - *	Returns the Unicode character represented at the specified - *	character (not byte) position in the UTF-8 string. + *	Returns the Unicode character represented at the specified character + *	(not byte) position in the UTF-8 string.   *   * Results:   *	As above. @@ -702,13 +663,13 @@ Tcl_UtfPrev(str, start)   *   *---------------------------------------------------------------------------   */ -  +  Tcl_UniChar -Tcl_UniCharAtIndex(src, index) -    register CONST char *src;	/* The UTF-8 string to dereference. */ -    register int index;		/* The position of the desired character. */ +Tcl_UniCharAtIndex( +    register const char *src,	/* The UTF-8 string to dereference. */ +    register int index)		/* The position of the desired character. */  { -    Tcl_UniChar ch; +    Tcl_UniChar ch = 0;      while (index >= 0) {  	index--; @@ -722,8 +683,8 @@ Tcl_UniCharAtIndex(src, index)   *   * Tcl_UtfAtIndex --   * - *	Returns a pointer to the specified character (not byte) position - *	in the UTF-8 string. + *	Returns a pointer to the specified character (not byte) position in + *	the UTF-8 string.   *   * Results:   *	As above. @@ -734,13 +695,13 @@ Tcl_UniCharAtIndex(src, index)   *---------------------------------------------------------------------------   */ -CONST char * -Tcl_UtfAtIndex(src, index) -    register CONST char *src;	/* The UTF-8 string. */ -    register int index;		/* The position of the desired character. */ +const char * +Tcl_UtfAtIndex( +    register const char *src,	/* The UTF-8 string. */ +    register int index)		/* The position of the desired character. */  {      Tcl_UniChar ch; -     +      while (index > 0) {  	index--;  	src += TclUtfToUniChar(src, &ch); @@ -757,31 +718,30 @@ Tcl_UtfAtIndex(src, index)   *   * Results:   *	Stores the bytes represented by the backslash sequence in dst and - *	returns the number of bytes written to dst.  At most TCL_UTF_MAX - *	bytes are written to dst; dst must have been large enough to accept - *	those bytes.  If readPtr isn't NULL then it is filled in with a - *	count of the number of bytes in the backslash sequence.   + *	returns the number of bytes written to dst. At most TCL_UTF_MAX bytes + *	are written to dst; dst must have been large enough to accept those + *	bytes. If readPtr isn't NULL then it is filled in with a count of the + *	number of bytes in the backslash sequence.   *   * Side effects: - *	The maximum number of bytes it takes to represent a Unicode - *	character in UTF-8 is guaranteed to be less than the number of - *	bytes used to express the backslash sequence that represents - *	that Unicode character.  If the target buffer into which the - *	caller is going to store the bytes that represent the Unicode - *	character is at least as large as the source buffer from which - *	the backslashed sequence was extracted, no buffer overruns should - *	occur. + *	The maximum number of bytes it takes to represent a Unicode character + *	in UTF-8 is guaranteed to be less than the number of bytes used to + *	express the backslash sequence that represents that Unicode character. + *	If the target buffer into which the caller is going to store the bytes + *	that represent the Unicode character is at least as large as the + *	source buffer from which the backslashed sequence was extracted, no + *	buffer overruns should occur.   *   *---------------------------------------------------------------------------   */  int -Tcl_UtfBackslash(src, readPtr, dst) -    CONST char *src;		/* Points to the backslash character of -				 * a backslash sequence. */ -    int *readPtr;		/* Fill in with number of characters read -				 * from src, unless NULL. */ -    char *dst;			/* Filled with the bytes represented by the +Tcl_UtfBackslash( +    const char *src,		/* Points to the backslash character of a +				 * backslash sequence. */ +    int *readPtr,		/* Fill in with number of characters read from +				 * src, unless NULL. */ +    char *dst)			/* Filled with the bytes represented by the  				 * backslash sequence. */  {  #define LINE_LENGTH 128 @@ -790,7 +750,10 @@ Tcl_UtfBackslash(src, readPtr, dst)      result = TclParseBackslash(src, LINE_LENGTH, &numRead, dst);      if (numRead == LINE_LENGTH) { -	/* We ate a whole line.  Pay the price of a strlen() */ +	/* +	 * We ate a whole line. Pay the price of a strlen() +	 */ +  	result = TclParseBackslash(src, (int)strlen(src), &numRead, dst);      }      if (readPtr != NULL) { @@ -804,12 +767,12 @@ Tcl_UtfBackslash(src, readPtr, dst)   *   * Tcl_UtfToUpper --   * - *	Convert lowercase characters to uppercase characters in a UTF - *	string in place.  The conversion may shrink the UTF string. + *	Convert lowercase characters to uppercase characters in a UTF string + *	in place. The conversion may shrink the UTF string.   *   * Results: - *	Returns the number of bytes in the resulting string - *	excluding the trailing null. + *	Returns the number of bytes in the resulting string excluding the + *	trailing null.   *   * Side effects:   *	Writes a terminating null after the last converted character. @@ -818,8 +781,8 @@ Tcl_UtfBackslash(src, readPtr, dst)   */  int -Tcl_UtfToUpper(str) -    char *str;			/* String to convert in place. */ +Tcl_UtfToUpper( +    char *str)			/* String to convert in place. */  {      Tcl_UniChar ch, upChar;      char *src, *dst; @@ -831,16 +794,16 @@ Tcl_UtfToUpper(str)      src = dst = str;      while (*src) { -        bytes = TclUtfToUniChar(src, &ch); +	bytes = TclUtfToUniChar(src, &ch);  	upChar = Tcl_UniCharToUpper(ch);  	/* -	 * To keep badly formed Utf strings from getting inflated by -	 * the conversion (thereby causing a segfault), only copy the -	 * upper case char to dst if its size is <= the original char. +	 * To keep badly formed Utf strings from getting inflated by the +	 * conversion (thereby causing a segfault), only copy the upper case +	 * char to dst if its size is <= the original char.  	 */ -	 -	if (bytes < UtfCount(upChar)) { + +	if (bytes < TclUtfCount(upChar)) {  	    memcpy(dst, src, (size_t) bytes);  	    dst += bytes;  	} else { @@ -857,12 +820,12 @@ Tcl_UtfToUpper(str)   *   * Tcl_UtfToLower --   * - *	Convert uppercase characters to lowercase characters in a UTF - *	string in place.  The conversion may shrink the UTF string. + *	Convert uppercase characters to lowercase characters in a UTF string + *	in place. The conversion may shrink the UTF string.   *   * Results: - *	Returns the number of bytes in the resulting string - *	excluding the trailing null. + *	Returns the number of bytes in the resulting string excluding the + *	trailing null.   *   * Side effects:   *	Writes a terminating null after the last converted character. @@ -871,13 +834,13 @@ Tcl_UtfToUpper(str)   */  int -Tcl_UtfToLower(str) -    char *str;			/* String to convert in place. */ +Tcl_UtfToLower( +    char *str)			/* String to convert in place. */  {      Tcl_UniChar ch, lowChar;      char *src, *dst;      int bytes; -     +      /*       * Iterate over the string until we hit the terminating null.       */ @@ -888,12 +851,12 @@ Tcl_UtfToLower(str)  	lowChar = Tcl_UniCharToLower(ch);  	/* -	 * To keep badly formed Utf strings from getting inflated by -	 * the conversion (thereby causing a segfault), only copy the -	 * lower case char to dst if its size is <= the original char. +	 * To keep badly formed Utf strings from getting inflated by the +	 * conversion (thereby causing a segfault), only copy the lower case +	 * char to dst if its size is <= the original char.  	 */ -	 -	if (bytes < UtfCount(lowChar)) { + +	if (bytes < TclUtfCount(lowChar)) {  	    memcpy(dst, src, (size_t) bytes);  	    dst += bytes;  	} else { @@ -910,13 +873,13 @@ Tcl_UtfToLower(str)   *   * Tcl_UtfToTitle --   * - *	Changes the first character of a UTF string to title case or - *	uppercase and the rest of the string to lowercase.  The - *	conversion happens in place and may shrink the UTF string. + *	Changes the first character of a UTF string to title case or uppercase + *	and the rest of the string to lowercase. The conversion happens in + *	place and may shrink the UTF string.   *   * Results: - *	Returns the number of bytes in the resulting string - *	excluding the trailing null. + *	Returns the number of bytes in the resulting string excluding the + *	trailing null.   *   * Side effects:   *	Writes a terminating null after the last converted character. @@ -925,13 +888,13 @@ Tcl_UtfToLower(str)   */  int -Tcl_UtfToTitle(str) -    char *str;			/* String to convert in place. */ +Tcl_UtfToTitle( +    char *str)			/* String to convert in place. */  {      Tcl_UniChar ch, titleChar, lowChar;      char *src, *dst;      int bytes; -     +      /*       * Capitalize the first character and then lowercase the rest of the       * characters until we get to a null. @@ -943,7 +906,7 @@ Tcl_UtfToTitle(str)  	bytes = TclUtfToUniChar(src, &ch);  	titleChar = Tcl_UniCharToTitle(ch); -	if (bytes < UtfCount(titleChar)) { +	if (bytes < TclUtfCount(titleChar)) {  	    memcpy(dst, src, (size_t) bytes);  	    dst += bytes;  	} else { @@ -955,7 +918,7 @@ Tcl_UtfToTitle(str)  	bytes = TclUtfToUniChar(src, &ch);  	lowChar = Tcl_UniCharToLower(ch); -	if (bytes < UtfCount(lowChar)) { +	if (bytes < TclUtfCount(lowChar)) {  	    memcpy(dst, src, (size_t) bytes);  	    dst += bytes;  	} else { @@ -972,8 +935,8 @@ Tcl_UtfToTitle(str)   *   * TclpUtfNcmp2 --   * - *	Compare at most n bytes of utf-8 strings cs and ct.  Both cs - *	and ct are assumed to be at least n bytes long. + *	Compare at most numBytes bytes of utf-8 strings cs and ct. Both cs and + *	ct are assumed to be at least numBytes bytes long.   *   * Results:   *	Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct. @@ -985,26 +948,28 @@ Tcl_UtfToTitle(str)   */  int -TclpUtfNcmp2(cs, ct, n) -    CONST char *cs;		/* UTF string to compare to ct. */ -    CONST char *ct;		/* UTF string cs is compared to. */ -    unsigned long n;		/* Number of *bytes* to compare. */ +TclpUtfNcmp2( +    const char *cs,		/* UTF string to compare to ct. */ +    const char *ct,		/* UTF string cs is compared to. */ +    unsigned long numBytes)	/* Number of *bytes* to compare. */  {      /* -     * We can't simply call 'memcmp(cs, ct, n);' because we need to check -     * for Tcl's \xC0\x80 non-utf-8 null encoding. -     * Otherwise utf-8 lexes fine in the strcmp manner. +     * We can't simply call 'memcmp(cs, ct, numBytes);' because we need to +     * check for Tcl's \xC0\x80 non-utf-8 null encoding. Otherwise utf-8 lexes +     * fine in the strcmp manner.       */ +      register int result = 0; -    for ( ; n != 0; n--, cs++, ct++) { +    for ( ; numBytes != 0; numBytes--, cs++, ct++) {  	if (*cs != *ct) {  	    result = UCHAR(*cs) - UCHAR(*ct);  	    break;  	}      } -    if (n && ((UCHAR(*cs) == 0xC0) || (UCHAR(*ct) == 0xC0))) { +    if (numBytes && ((UCHAR(*cs) == 0xC0) || (UCHAR(*ct) == 0xC0))) {  	unsigned char c1, c2; +  	c1 = ((UCHAR(*cs) == 0xC0) && (UCHAR(cs[1]) == 0x80)) ? 0 : UCHAR(*cs);  	c2 = ((UCHAR(*ct) == 0xC0) && (UCHAR(ct[1]) == 0x80)) ? 0 : UCHAR(*ct);  	result = (c1 - c2); @@ -1017,8 +982,8 @@ TclpUtfNcmp2(cs, ct, n)   *   * Tcl_UtfNcmp --   * - *	Compare at most n UTF chars of string cs to string ct.  Both cs - *	and ct are assumed to be at least n UTF chars long. + *	Compare at most numChars UTF chars of string cs to string ct. Both cs + *	and ct are assumed to be at least numChars UTF chars long.   *   * Results:   *	Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct. @@ -1030,23 +995,26 @@ TclpUtfNcmp2(cs, ct, n)   */  int -Tcl_UtfNcmp(cs, ct, n) -    CONST char *cs;		/* UTF string to compare to ct. */ -    CONST char *ct;		/* UTF string cs is compared to. */ -    unsigned long n;		/* Number of UTF chars to compare. */ +Tcl_UtfNcmp( +    const char *cs,		/* UTF string to compare to ct. */ +    const char *ct,		/* UTF string cs is compared to. */ +    unsigned long numChars)	/* Number of UTF chars to compare. */  {      Tcl_UniChar ch1, ch2; +      /* -     * Cannot use 'memcmp(cs, ct, n);' as byte representation of -     * \u0000 (the pair of bytes 0xc0,0x80) is larger than byte -     * representation of \u0001 (the byte 0x01.) +     * Cannot use 'memcmp(cs, ct, n);' as byte representation of \u0000 (the +     * pair of bytes 0xc0,0x80) is larger than byte representation of \u0001 +     * (the byte 0x01.)       */ -    while (n-- > 0) { + +    while (numChars-- > 0) {  	/* -	 * n must be interpreted as chars, not bytes. -	 * This should be called only when both strings are of -	 * at least n chars long (no need for \0 check) +	 * n must be interpreted as chars, not bytes. This should be called +	 * only when both strings are of at least n chars long (no need for \0 +	 * check)  	 */ +  	cs += TclUtfToUniChar(cs, &ch1);  	ct += TclUtfToUniChar(ct, &ch2);  	if (ch1 != ch2) { @@ -1061,9 +1029,9 @@ Tcl_UtfNcmp(cs, ct, n)   *   * Tcl_UtfNcasecmp --   * - *	Compare at most n UTF chars of string cs to string ct case - *	insensitive.  Both cs and ct are assumed to be at least n - *	UTF chars long. + *	Compare at most numChars UTF chars of string cs to string ct case + *	insensitive. Both cs and ct are assumed to be at least numChars UTF + *	chars long.   *   * Results:   *	Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct. @@ -1075,13 +1043,13 @@ Tcl_UtfNcmp(cs, ct, n)   */  int -Tcl_UtfNcasecmp(cs, ct, n) -    CONST char *cs;		/* UTF string to compare to ct. */ -    CONST char *ct;		/* UTF string cs is compared to. */ -    unsigned long n;			/* Number of UTF chars to compare. */ +Tcl_UtfNcasecmp( +    const char *cs,		/* UTF string to compare to ct. */ +    const char *ct,		/* UTF string cs is compared to. */ +    unsigned long numChars)	/* Number of UTF chars to compare. */  {      Tcl_UniChar ch1, ch2; -    while (n-- > 0) { +    while (numChars-- > 0) {  	/*  	 * n must be interpreted as chars, not bytes.  	 * This should be called only when both strings are of @@ -1103,6 +1071,46 @@ Tcl_UtfNcasecmp(cs, ct, n)  /*   *----------------------------------------------------------------------   * + * Tcl_UtfNcasecmp -- + * + *	Compare UTF chars of string cs to string ct case insensitively. + *	Replacement for strcasecmp in Tcl core, in places where UTF-8 should + *	be handled. + * + * Results: + *	Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct. + * + * Side effects: + *	None. + * + *---------------------------------------------------------------------- + */ + +int +TclUtfCasecmp( +    const char *cs,		/* UTF string to compare to ct. */ +    const char *ct)		/* UTF string cs is compared to. */ +{ +    while (*cs && *ct) { +	Tcl_UniChar ch1, ch2; + +	cs += TclUtfToUniChar(cs, &ch1); +	ct += TclUtfToUniChar(ct, &ch2); +	if (ch1 != ch2) { +	    ch1 = Tcl_UniCharToLower(ch1); +	    ch2 = Tcl_UniCharToLower(ch2); +	    if (ch1 != ch2) { +		return ch1 - ch2; +	    } +	} +    } +    return UCHAR(*cs) - UCHAR(*ct); +} + + +/* + *---------------------------------------------------------------------- + *   * Tcl_UniCharToUpper --   *   *	Compute the uppercase equivalent of the given Unicode character. @@ -1117,16 +1125,15 @@ Tcl_UtfNcasecmp(cs, ct, n)   */  Tcl_UniChar -Tcl_UniCharToUpper(ch) -    int ch;			/* Unicode character to convert. */ +Tcl_UniCharToUpper( +    int ch)			/* Unicode character to convert. */  {      int info = GetUniCharInfo(ch);      if (GetCaseType(info) & 0x04) { -	return (Tcl_UniChar) (ch - GetDelta(info)); -    } else { -	return ch; +	ch -= GetDelta(info);      } +    return (Tcl_UniChar) ch;  }  /* @@ -1146,16 +1153,15 @@ Tcl_UniCharToUpper(ch)   */  Tcl_UniChar -Tcl_UniCharToLower(ch) -    int ch;			/* Unicode character to convert. */ +Tcl_UniCharToLower( +    int ch)			/* Unicode character to convert. */  {      int info = GetUniCharInfo(ch);      if (GetCaseType(info) & 0x02) { -	return (Tcl_UniChar) (ch + GetDelta(info)); -    } else { -	return ch; +	ch += GetDelta(info);      } +    return (Tcl_UniChar) ch;  }  /* @@ -1175,8 +1181,8 @@ Tcl_UniCharToLower(ch)   */  Tcl_UniChar -Tcl_UniCharToTitle(ch) -    int ch;			/* Unicode character to convert. */ +Tcl_UniCharToTitle( +    int ch)			/* Unicode character to convert. */  {      int info = GetUniCharInfo(ch);      int mode = GetCaseType(info); @@ -1186,12 +1192,11 @@ Tcl_UniCharToTitle(ch)  	 * Subtract or add one depending on the original case.  	 */ -	return (Tcl_UniChar) (ch + ((mode & 0x4) ? -1 : 1)); +	ch += ((mode & 0x4) ? -1 : 1);      } else if (mode == 0x4) { -	return (Tcl_UniChar) (ch - GetDelta(info)); -    } else { -	return ch; +	ch -= GetDelta(info);      } +    return (Tcl_UniChar) ch;  }  /* @@ -1199,7 +1204,7 @@ Tcl_UniCharToTitle(ch)   *   * Tcl_UniCharLen --   * - *	Find the length of a UniChar string.  The str input must be null + *	Find the length of a UniChar string. The str input must be null   *	terminated.   *   * Results: @@ -1212,14 +1217,14 @@ Tcl_UniCharToTitle(ch)   */  int -Tcl_UniCharLen(str) -    CONST Tcl_UniChar *str;	/* Unicode string to find length of. */ +Tcl_UniCharLen( +    const Tcl_UniChar *uniStr)	/* Unicode string to find length of. */  {      int len = 0; -     -    while (*str != '\0') { + +    while (*uniStr != '\0') {  	len++; -	str++; +	uniStr++;      }      return len;  } @@ -1229,11 +1234,11 @@ Tcl_UniCharLen(str)   *   * Tcl_UniCharNcmp --   * - *	Compare at most n unichars of string cs to string ct.  Both cs - *	and ct are assumed to be at least n unichars long. + *	Compare at most numChars unichars of string ucs to string uct. + *	Both ucs and uct are assumed to be at least numChars unichars long.   *   * Results: - *	Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct. + *	Return <0 if ucs < uct, 0 if ucs == uct, or >0 if ucs > uct.   *   * Side effects:   *	None. @@ -1242,24 +1247,26 @@ Tcl_UniCharLen(str)   */  int -Tcl_UniCharNcmp(cs, ct, n) -    CONST Tcl_UniChar *cs;		/* Unicode string to compare to ct. */ -    CONST Tcl_UniChar *ct;		/* Unicode string cs is compared to. */ -    unsigned long n;			/* Number of unichars to compare. */ +Tcl_UniCharNcmp( +    const Tcl_UniChar *ucs,	/* Unicode string to compare to uct. */ +    const Tcl_UniChar *uct,	/* Unicode string ucs is compared to. */ +    unsigned long numChars)	/* Number of unichars to compare. */  {  #ifdef WORDS_BIGENDIAN      /*       * We are definitely on a big-endian machine; memcmp() is safe       */ -    return memcmp(cs, ct, n*sizeof(Tcl_UniChar)); + +    return memcmp(ucs, uct, numChars*sizeof(Tcl_UniChar));  #else /* !WORDS_BIGENDIAN */      /*       * We can't simply call memcmp() because that is not lexically correct.       */ -    for ( ; n != 0; cs++, ct++, n--) { -	if (*cs != *ct) { -	    return (*cs - *ct); + +    for ( ; numChars != 0; ucs++, uct++, numChars--) { +	if (*ucs != *uct) { +	    return (*ucs - *uct);  	}      }      return 0; @@ -1271,12 +1278,12 @@ Tcl_UniCharNcmp(cs, ct, n)   *   * Tcl_UniCharNcasecmp --   * - *	Compare at most n unichars of string cs to string ct case - *	insensitive.  Both cs and ct are assumed to be at least n + *	Compare at most numChars unichars of string ucs to string uct case + *	insensitive. Both ucs and uct are assumed to be at least numChars   *	unichars long.   *   * Results: - *	Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct. + *	Return <0 if ucs < uct, 0 if ucs == uct, or >0 if ucs > uct.   *   * Side effects:   *	None. @@ -1285,15 +1292,16 @@ Tcl_UniCharNcmp(cs, ct, n)   */  int -Tcl_UniCharNcasecmp(cs, ct, n) -    CONST Tcl_UniChar *cs;		/* Unicode string to compare to ct. */ -    CONST Tcl_UniChar *ct;		/* Unicode string cs is compared to. */ -    unsigned long n;			/* Number of unichars to compare. */ +Tcl_UniCharNcasecmp( +    const Tcl_UniChar *ucs,	/* Unicode string to compare to uct. */ +    const Tcl_UniChar *uct,	/* Unicode string ucs is compared to. */ +    unsigned long numChars)	/* Number of unichars to compare. */  { -    for ( ; n != 0; n--, cs++, ct++) { -	if (*cs != *ct) { -	    Tcl_UniChar lcs = Tcl_UniCharToLower(*cs); -	    Tcl_UniChar lct = Tcl_UniCharToLower(*ct); +    for ( ; numChars != 0; numChars--, ucs++, uct++) { +	if (*ucs != *uct) { +	    Tcl_UniChar lcs = Tcl_UniCharToLower(*ucs); +	    Tcl_UniChar lct = Tcl_UniCharToLower(*uct); +  	    if (lcs != lct) {  		return (lcs - lct);  	    } @@ -1319,12 +1327,15 @@ Tcl_UniCharNcasecmp(cs, ct, n)   */  int -Tcl_UniCharIsAlnum(ch) -    int ch;			/* Unicode character to test. */ +Tcl_UniCharIsAlnum( +    int ch)			/* Unicode character to test. */  { -    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); - -    return (((ALPHA_BITS | DIGIT_BITS) >> category) & 1); +#if TCL_UTF_MAX > 3 +    if (UNICODE_OUT_OF_RANGE(ch)) { +	return 0; +    } +#endif +    return (((ALPHA_BITS | DIGIT_BITS) >> GetCategory(ch)) & 1);  }  /* @@ -1344,11 +1355,15 @@ Tcl_UniCharIsAlnum(ch)   */  int -Tcl_UniCharIsAlpha(ch) -    int ch;			/* Unicode character to test. */ +Tcl_UniCharIsAlpha( +    int ch)			/* Unicode character to test. */  { -    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); -    return ((ALPHA_BITS >> category) & 1); +#if TCL_UTF_MAX > 3 +    if (UNICODE_OUT_OF_RANGE(ch)) { +	return 0; +    } +#endif +    return ((ALPHA_BITS >> GetCategory(ch)) & 1);  }  /* @@ -1368,10 +1383,22 @@ Tcl_UniCharIsAlpha(ch)   */  int -Tcl_UniCharIsControl(ch) -    int ch;			/* Unicode character to test. */ +Tcl_UniCharIsControl( +    int ch)			/* Unicode character to test. */  { -    return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == CONTROL); +#if TCL_UTF_MAX > 3 +    if (UNICODE_OUT_OF_RANGE(ch)) { +	ch &= 0x1fffff; +	if ((ch == 0xe0001) || ((ch >= 0xe0020) && (ch <= 0xe007f))) { +	    return 1; +	} +	if ((ch >= 0xf0000) && ((ch & 0xffff) <= 0xfffd)) { +	    return 1; +	} +	return 0; +    } +#endif +    return ((CONTROL_BITS >> GetCategory(ch)) & 1);  }  /* @@ -1391,11 +1418,15 @@ Tcl_UniCharIsControl(ch)   */  int -Tcl_UniCharIsDigit(ch) -    int ch;			/* Unicode character to test. */ +Tcl_UniCharIsDigit( +    int ch)			/* Unicode character to test. */  { -    return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) -	    == DECIMAL_DIGIT_NUMBER); +#if TCL_UTF_MAX > 3 +    if (UNICODE_OUT_OF_RANGE(ch)) { +	return 0; +    } +#endif +    return (GetCategory(ch) == DECIMAL_DIGIT_NUMBER);  }  /* @@ -1415,11 +1446,16 @@ Tcl_UniCharIsDigit(ch)   */  int -Tcl_UniCharIsGraph(ch) -    int ch;			/* Unicode character to test. */ +Tcl_UniCharIsGraph( +    int ch)			/* Unicode character to test. */  { -    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); -    return (((PRINT_BITS >> category) & 1) && ((unsigned char) ch != ' ')); +#if TCL_UTF_MAX > 3 +    if (UNICODE_OUT_OF_RANGE(ch)) { +	ch &= 0x1fffff; +	return (ch >= 0xe0100) && (ch <= 0xe01ef); +    } +#endif +    return ((GRAPH_BITS >> GetCategory(ch)) & 1);  }  /* @@ -1439,10 +1475,15 @@ Tcl_UniCharIsGraph(ch)   */  int -Tcl_UniCharIsLower(ch) -    int ch;			/* Unicode character to test. */ +Tcl_UniCharIsLower( +    int ch)			/* Unicode character to test. */  { -    return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == LOWERCASE_LETTER); +#if TCL_UTF_MAX > 3 +    if (UNICODE_OUT_OF_RANGE(ch)) { +	return 0; +    } +#endif +    return (GetCategory(ch) == LOWERCASE_LETTER);  }  /* @@ -1462,11 +1503,16 @@ Tcl_UniCharIsLower(ch)   */  int -Tcl_UniCharIsPrint(ch) -    int ch;			/* Unicode character to test. */ +Tcl_UniCharIsPrint( +    int ch)			/* Unicode character to test. */  { -    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); -    return ((PRINT_BITS >> category) & 1); +#if TCL_UTF_MAX > 3 +    if (UNICODE_OUT_OF_RANGE(ch)) { +	ch &= 0x1fffff; +	return (ch >= 0xe0100) && (ch <= 0xe01ef); +    } +#endif +    return (((GRAPH_BITS|SPACE_BITS) >> GetCategory(ch)) & 1);  }  /* @@ -1486,11 +1532,15 @@ Tcl_UniCharIsPrint(ch)   */  int -Tcl_UniCharIsPunct(ch) -    int ch;			/* Unicode character to test. */ +Tcl_UniCharIsPunct( +    int ch)			/* Unicode character to test. */  { -    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); -    return ((PUNCT_BITS >> category) & 1); +#if TCL_UTF_MAX > 3 +    if (UNICODE_OUT_OF_RANGE(ch)) { +	return 0; +    } +#endif +    return ((PUNCT_BITS >> GetCategory(ch)) & 1);  }  /* @@ -1510,10 +1560,16 @@ Tcl_UniCharIsPunct(ch)   */  int -Tcl_UniCharIsSpace(ch) -    int ch;			/* Unicode character to test. */ +Tcl_UniCharIsSpace( +    int ch)			/* Unicode character to test. */  { -    register int category; +#if TCL_UTF_MAX > 3 +    /* Ignore upper 11 bits. */ +    ch &= 0x1fffff; +#else +    /* Ignore upper 16 bits. */ +    ch &= 0xffff; +#endif      /*       * If the character is within the first 127 characters, just use the @@ -1521,10 +1577,16 @@ Tcl_UniCharIsSpace(ch)       */      if (ch < 0x80) { -	return isspace(UCHAR(ch)); /* INTL: ISO space */ +	return TclIsSpaceProc((char) ch); +#if TCL_UTF_MAX > 3 +    } else if (UNICODE_OUT_OF_RANGE(ch)) { +	return 0; +#endif +    } else if (ch == 0x0085 || ch == 0x180e || ch == 0x200b +	    || ch == 0x202f || ch == 0x2060 || ch == 0xfeff) { +	return 1;      } else { -	category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); -	return ((SPACE_BITS >> category) & 1); +	return ((SPACE_BITS >> GetCategory(ch)) & 1);      }  } @@ -1545,10 +1607,15 @@ Tcl_UniCharIsSpace(ch)   */  int -Tcl_UniCharIsUpper(ch) -    int ch;			/* Unicode character to test. */ +Tcl_UniCharIsUpper( +    int ch)			/* Unicode character to test. */  { -    return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == UPPERCASE_LETTER); +#if TCL_UTF_MAX > 3 +    if (UNICODE_OUT_OF_RANGE(ch)) { +	return 0; +    } +#endif +    return (GetCategory(ch) == UPPERCASE_LETTER);  }  /* @@ -1556,8 +1623,7 @@ Tcl_UniCharIsUpper(ch)   *   * Tcl_UniCharIsWordChar --   * - *	Test if a character is alphanumeric or a connector punctuation - *	mark. + *	Test if a character is alphanumeric or a connector punctuation mark.   *   * Results:   *	Returns 1 if character is a word character. @@ -1569,12 +1635,15 @@ Tcl_UniCharIsUpper(ch)   */  int -Tcl_UniCharIsWordChar(ch) -    int ch;			/* Unicode character to test. */ +Tcl_UniCharIsWordChar( +    int ch)			/* Unicode character to test. */  { -    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); - -    return (((ALPHA_BITS | DIGIT_BITS | CONNECTOR_BITS) >> category) & 1); +#if TCL_UTF_MAX > 3 +    if (UNICODE_OUT_OF_RANGE(ch)) { +	return 0; +    } +#endif +    return ((WORD_BITS >> GetCategory(ch)) & 1);  }  /* @@ -1583,17 +1652,16 @@ Tcl_UniCharIsWordChar(ch)   * Tcl_UniCharCaseMatch --   *   *	See if a particular Unicode string matches a particular pattern. - *	Allows case insensitivity.  This is the Unicode equivalent of - *	the char* Tcl_StringCaseMatch.  The UniChar strings must be - *	NULL-terminated.  This has no provision for counted UniChar - *	strings, thus should not be used where NULLs are expected in the - *	UniChar string.  Use TclUniCharMatch where possible. + *	Allows case insensitivity. This is the Unicode equivalent of the char* + *	Tcl_StringCaseMatch. The UniChar strings must be NULL-terminated. + *	This has no provision for counted UniChar strings, thus should not be + *	used where NULLs are expected in the UniChar string. Use + *	TclUniCharMatch where possible.   *   * Results: - *	The return value is 1 if string matches pattern, and - *	0 otherwise.  The matching operation permits the following - *	special characters in the pattern: *?\[] (see the manual - *	entry for details on what these mean). + *	The return value is 1 if string matches pattern, and 0 otherwise. The + *	matching operation permits the following special characters in the + *	pattern: *?\[] (see the manual entry for details on what these mean).   *   * Side effects:   *	None. @@ -1602,44 +1670,48 @@ Tcl_UniCharIsWordChar(ch)   */  int -Tcl_UniCharCaseMatch(string, pattern, nocase) -    CONST Tcl_UniChar *string;	/* Unicode String. */ -    CONST Tcl_UniChar *pattern;	/* Pattern, which may contain special +Tcl_UniCharCaseMatch( +    const Tcl_UniChar *uniStr,	/* Unicode String. */ +    const Tcl_UniChar *uniPattern, +				/* Pattern, which may contain special  				 * characters. */ -    int nocase;			/* 0 for case sensitive, 1 for insensitive */ +    int nocase)			/* 0 for case sensitive, 1 for insensitive */  {      Tcl_UniChar ch1, p; -     +      while (1) { -	p = *pattern; -	 +	p = *uniPattern; +  	/* -	 * See if we're at the end of both the pattern and the string.  If -	 * so, we succeeded.  If we're at the end of the pattern but not at -	 * the end of the string, we failed. +	 * See if we're at the end of both the pattern and the string. If so, +	 * we succeeded. If we're at the end of the pattern but not at the end +	 * of the string, we failed.  	 */ -	 +  	if (p == 0) { -	    return (*string == 0); +	    return (*uniStr == 0);  	} -	if ((*string == 0) && (p != '*')) { +	if ((*uniStr == 0) && (p != '*')) {  	    return 0;  	}  	/* -	 * Check for a "*" as the next pattern character.  It matches any -	 * substring.  We handle this by skipping all the characters up to the +	 * Check for a "*" as the next pattern character. It matches any +	 * substring. We handle this by skipping all the characters up to the  	 * next matching one in the pattern, and then calling ourselves  	 * recursively for each postfix of string, until either we match or we  	 * reach the end of the string.  	 */ -	 +  	if (p == '*') {  	    /*  	     * Skip all successive *'s in the pattern  	     */ -	    while (*(++pattern) == '*') {} -	    p = *pattern; + +	    while (*(++uniPattern) == '*') { +		/* empty body */ +	    } +	    p = *uniPattern;  	    if (p == 0) {  		return 1;  	    } @@ -1652,63 +1724,67 @@ Tcl_UniCharCaseMatch(string, pattern, nocase)  		 * quickly if the next char in the pattern isn't a special  		 * character  		 */ +  		if ((p != '[') && (p != '?') && (p != '\\')) {  		    if (nocase) { -			while (*string && (p != *string) -				&& (p != Tcl_UniCharToLower(*string))) { -			    string++; +			while (*uniStr && (p != *uniStr) +				&& (p != Tcl_UniCharToLower(*uniStr))) { +			    uniStr++;  			}  		    } else { -			while (*string && (p != *string)) { string++; } +			while (*uniStr && (p != *uniStr)) { +			    uniStr++; +			}  		    }  		} -		if (Tcl_UniCharCaseMatch(string, pattern, nocase)) { +		if (Tcl_UniCharCaseMatch(uniStr, uniPattern, nocase)) {  		    return 1;  		} -		if (*string == 0) { +		if (*uniStr == 0) {  		    return 0;  		} -		string++; +		uniStr++;  	    }  	}  	/* -	 * Check for a "?" as the next pattern character.  It matches -	 * any single character. +	 * Check for a "?" as the next pattern character. It matches any +	 * single character.  	 */  	if (p == '?') { -	    pattern++; -	    string++; +	    uniPattern++; +	    uniStr++;  	    continue;  	}  	/* -	 * Check for a "[" as the next pattern character.  It is followed -	 * by a list of characters that are acceptable, or by a range -	 * (two characters separated by "-"). +	 * Check for a "[" as the next pattern character. It is followed by a +	 * list of characters that are acceptable, or by a range (two +	 * characters separated by "-").  	 */ -	 +  	if (p == '[') {  	    Tcl_UniChar startChar, endChar; -	    pattern++; -	    ch1 = (nocase ? Tcl_UniCharToLower(*string) : *string); -	    string++; +	    uniPattern++; +	    ch1 = (nocase ? Tcl_UniCharToLower(*uniStr) : *uniStr); +	    uniStr++;  	    while (1) { -		if ((*pattern == ']') || (*pattern == 0)) { +		if ((*uniPattern == ']') || (*uniPattern == 0)) {  		    return 0;  		} -		startChar = (nocase ? Tcl_UniCharToLower(*pattern) : *pattern); -		pattern++; -		if (*pattern == '-') { -		    pattern++; -		    if (*pattern == 0) { +		startChar = (nocase ? Tcl_UniCharToLower(*uniPattern) +			: *uniPattern); +		uniPattern++; +		if (*uniPattern == '-') { +		    uniPattern++; +		    if (*uniPattern == 0) {  			return 0;  		    } -		    endChar = (nocase ? Tcl_UniCharToLower(*pattern) -			    : *pattern); -		    pattern++; +		    endChar = (nocase ? Tcl_UniCharToLower(*uniPattern) +			    : *uniPattern); +		    uniPattern++;  		    if (((startChar <= ch1) && (ch1 <= endChar))  			    || ((endChar <= ch1) && (ch1 <= startChar))) {  			/* @@ -1720,42 +1796,43 @@ Tcl_UniCharCaseMatch(string, pattern, nocase)  		    break;  		}  	    } -	    while (*pattern != ']') { -		if (*pattern == 0) { -		    pattern--; +	    while (*uniPattern != ']') { +		if (*uniPattern == 0) { +		    uniPattern--;  		    break;  		} -		pattern++; +		uniPattern++;  	    } -	    pattern++; +	    uniPattern++;  	    continue;  	}  	/* -	 * If the next pattern character is '\', just strip off the '\' -	 * so we do exact matching on the character that follows. +	 * If the next pattern character is '\', just strip off the '\' so we +	 * do exact matching on the character that follows.  	 */  	if (p == '\\') { -	    if (*(++pattern) == '\0') { +	    if (*(++uniPattern) == '\0') {  		return 0;  	    }  	}  	/* -	 * There's no special character.  Just make sure that the next -	 * bytes of each string match. +	 * There's no special character. Just make sure that the next bytes of +	 * each string match.  	 */  	if (nocase) { -	    if (Tcl_UniCharToLower(*string) != Tcl_UniCharToLower(*pattern)) { +	    if (Tcl_UniCharToLower(*uniStr) != +		    Tcl_UniCharToLower(*uniPattern)) {  		return 0;  	    } -	} else if (*string != *pattern) { +	} else if (*uniStr != *uniPattern) {  	    return 0;  	} -	string++; -	pattern++; +	uniStr++; +	uniPattern++;      }  } @@ -1765,15 +1842,14 @@ Tcl_UniCharCaseMatch(string, pattern, nocase)   * TclUniCharMatch --   *   *	See if a particular Unicode string matches a particular pattern. - *	Allows case insensitivity.  This is the Unicode equivalent of the - *	char* Tcl_StringCaseMatch.  This variant of Tcl_UniCharCaseMatch - *	uses counted Strings, so embedded NULLs are allowed. + *	Allows case insensitivity. This is the Unicode equivalent of the char* + *	Tcl_StringCaseMatch. This variant of Tcl_UniCharCaseMatch uses counted + *	Strings, so embedded NULLs are allowed.   *   * Results: - *	The return value is 1 if string matches pattern, and - *	0 otherwise.  The matching operation permits the following - *	special characters in the pattern: *?\[] (see the manual - *	entry for details on what these mean). + *	The return value is 1 if string matches pattern, and 0 otherwise. The + *	matching operation permits the following special characters in the + *	pattern: *?\[] (see the manual entry for details on what these mean).   *   * Side effects:   *	None. @@ -1782,25 +1858,25 @@ Tcl_UniCharCaseMatch(string, pattern, nocase)   */  int -TclUniCharMatch(string, strLen, pattern, ptnLen, nocase) -    CONST Tcl_UniChar *string;	/* Unicode String. */ -    int strLen;			/* length of String */ -    CONST Tcl_UniChar *pattern;	/* Pattern, which may contain special +TclUniCharMatch( +    const Tcl_UniChar *string,	/* Unicode String. */ +    int strLen,			/* Length of String */ +    const Tcl_UniChar *pattern,	/* Pattern, which may contain special  				 * characters. */ -    int ptnLen;			/* length of Pattern */ -    int nocase;			/* 0 for case sensitive, 1 for insensitive */ +    int ptnLen,			/* Length of Pattern */ +    int nocase)			/* 0 for case sensitive, 1 for insensitive */  { -    CONST Tcl_UniChar *stringEnd, *patternEnd; +    const Tcl_UniChar *stringEnd, *patternEnd;      Tcl_UniChar p; -    stringEnd  = string + strLen; +    stringEnd = string + strLen;      patternEnd = pattern + ptnLen;      while (1) {  	/* -	 * See if we're at the end of both the pattern and the string.  If -	 * so, we succeeded.  If we're at the end of the pattern but not at -	 * the end of the string, we failed. +	 * See if we're at the end of both the pattern and the string. If so, +	 * we succeeded. If we're at the end of the pattern but not at the end +	 * of the string, we failed.  	 */  	if (pattern == patternEnd) { @@ -1812,18 +1888,21 @@ TclUniCharMatch(string, strLen, pattern, ptnLen, nocase)  	}  	/* -	 * Check for a "*" as the next pattern character.  It matches any -	 * substring.  We handle this by skipping all the characters up to the +	 * Check for a "*" as the next pattern character. It matches any +	 * substring. We handle this by skipping all the characters up to the  	 * next matching one in the pattern, and then calling ourselves  	 * recursively for each postfix of string, until either we match or we  	 * reach the end of the string.  	 */ -	 +  	if (p == '*') {  	    /* -	     * Skip all successive *'s in the pattern +	     * Skip all successive *'s in the pattern.  	     */ -	    while (*(++pattern) == '*') {} + +	    while (*(++pattern) == '*') { +		/* empty body */ +	    }  	    if (pattern == patternEnd) {  		return 1;  	    } @@ -1835,8 +1914,9 @@ TclUniCharMatch(string, strLen, pattern, ptnLen, nocase)  		/*  		 * Optimization for matching - cruise through the string  		 * quickly if the next char in the pattern isn't a special -		 * character +		 * character.  		 */ +  		if ((p != '[') && (p != '?') && (p != '\\')) {  		    if (nocase) {  			while ((string < stringEnd) && (p != *string) @@ -1861,8 +1941,8 @@ TclUniCharMatch(string, strLen, pattern, ptnLen, nocase)  	}  	/* -	 * Check for a "?" as the next pattern character.  It matches -	 * any single character. +	 * Check for a "?" as the next pattern character. It matches any +	 * single character.  	 */  	if (p == '?') { @@ -1872,11 +1952,11 @@ TclUniCharMatch(string, strLen, pattern, ptnLen, nocase)  	}  	/* -	 * Check for a "[" as the next pattern character.  It is followed -	 * by a list of characters that are acceptable, or by a range -	 * (two characters separated by "-"). +	 * Check for a "[" as the next pattern character. It is followed by a +	 * list of characters that are acceptable, or by a range (two +	 * characters separated by "-").  	 */ -	 +  	if (p == '[') {  	    Tcl_UniChar ch1, startChar, endChar; @@ -1920,8 +2000,8 @@ TclUniCharMatch(string, strLen, pattern, ptnLen, nocase)  	}  	/* -	 * If the next pattern character is '\', just strip off the '\' -	 * so we do exact matching on the character that follows. +	 * If the next pattern character is '\', just strip off the '\' so we +	 * do exact matching on the character that follows.  	 */  	if (p == '\\') { @@ -1931,8 +2011,8 @@ TclUniCharMatch(string, strLen, pattern, ptnLen, nocase)  	}  	/* -	 * There's no special character.  Just make sure that the next -	 * bytes of each string match. +	 * There's no special character. Just make sure that the next bytes of +	 * each string match.  	 */  	if (nocase) { @@ -1946,3 +2026,11 @@ TclUniCharMatch(string, strLen, pattern, ptnLen, nocase)  	pattern++;      }  } + +/* + * Local Variables: + * mode: c + * c-basic-offset: 4 + * fill-column: 78 + * End: + */ | 
