diff options
Diffstat (limited to 'generic/tclUtf.c')
-rw-r--r-- | generic/tclUtf.c | 121 |
1 files changed, 64 insertions, 57 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 11bde5c..ac76309 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -119,7 +119,7 @@ static int Invalid(const char *src); *--------------------------------------------------------------------------- */ -int +size_t TclUtfCount( int ch) /* The Unicode character whose size is returned. */ { @@ -314,13 +314,13 @@ three: char * Tcl_UniCharToUtfDString( const int *uniStr, /* Unicode string to convert to UTF-8. */ - int uniLength, /* Length of Unicode string. */ + size_t uniLength, /* Length of Unicode string. */ Tcl_DString *dsPtr) /* UTF-8 representation of string is appended * to this previously initialized DString. */ { const int *w, *wEnd; char *p, *string; - int oldLength; + size_t oldLength; /* * UTF-8 string length in bytes will be <= Unicode string length * 4. @@ -329,7 +329,7 @@ Tcl_UniCharToUtfDString( if (uniStr == NULL) { return NULL; } - if (uniLength < 0) { + if (uniLength == TCL_AUTO_LENGTH) { uniLength = 0; w = uniStr; while (*w != '\0') { @@ -355,13 +355,14 @@ Tcl_UniCharToUtfDString( char * Tcl_Char16ToUtfDString( const unsigned short *uniStr,/* Utf-16 string to convert to UTF-8. */ - int uniLength, /* Length of Utf-16 string. */ + size_t uniLength, /* Length of Utf-16 string. */ Tcl_DString *dsPtr) /* UTF-8 representation of string is appended * to this previously initialized DString. */ { const unsigned short *w, *wEnd; char *p, *string; - int oldLength, len = 1; + size_t oldLength; + int len = 1; /* * UTF-8 string length in bytes will be <= Utf16 string length * 3. @@ -370,7 +371,7 @@ Tcl_Char16ToUtfDString( if (uniStr == NULL) { return NULL; } - if (uniLength < 0) { + if (uniLength == TCL_AUTO_LENGTH) { uniLength = 0; w = uniStr; @@ -421,7 +422,7 @@ Tcl_Char16ToUtfDString( * Tcl_UtfCharComplete() before calling this routine to ensure that * enough bytes remain in the string. * - * If TCL_UTF_MAX <= 4, special handling of Surrogate pairs is done: + * If TCL_UTF_MAX <= 3, special handling of Surrogate pairs is done: * For any UTF-8 string containing a character outside of the BMP, the * first call to this function will fill *chPtr with the high surrogate * and generate a return value of 1. Calling Tcl_UtfToUniChar again @@ -654,7 +655,7 @@ Tcl_UtfToChar16( int * Tcl_UtfToUniCharDString( const char *src, /* UTF-8 string to convert to Unicode. */ - int length, /* Length of UTF-8 string in bytes, or -1 for + size_t length, /* Length of UTF-8 string in bytes, or -1 for * strlen(). */ Tcl_DString *dsPtr) /* Unicode representation of string is * appended to this previously initialized @@ -662,7 +663,7 @@ Tcl_UtfToUniCharDString( { int ch = 0, *w, *wString; const char *p; - int oldLength; + size_t oldLength; /* Pointer to the end of string. Never read endPtr[0] */ const char *endPtr = src + length; /* Pointer to last byte where optimization still can be used */ @@ -671,7 +672,7 @@ Tcl_UtfToUniCharDString( if (src == NULL) { return NULL; } - if (length < 0) { + if (length == TCL_AUTO_LENGTH) { length = strlen(src); } @@ -711,7 +712,7 @@ Tcl_UtfToUniCharDString( unsigned short * Tcl_UtfToChar16DString( const char *src, /* UTF-8 string to convert to Unicode. */ - int length, /* Length of UTF-8 string in bytes, or -1 for + size_t length, /* Length of UTF-8 string in bytes, or -1 for * strlen(). */ Tcl_DString *dsPtr) /* Unicode representation of string is * appended to this previously initialized @@ -719,7 +720,7 @@ Tcl_UtfToChar16DString( { unsigned short ch = 0, *w, *wString; const char *p; - int oldLength; + size_t oldLength; /* Pointer to the end of string. Never read endPtr[0] */ const char *endPtr = src + length; /* Pointer to last byte where optimization still can be used */ @@ -728,12 +729,12 @@ Tcl_UtfToChar16DString( if (src == NULL) { return NULL; } - if (length < 0) { + if (length == TCL_AUTO_LENGTH) { length = strlen(src); } /* - * Unicode string length in Tcl_UniChars will be <= UTF-8 string length in + * Unicode string length in WCHARs will be <= UTF-8 string length in * bytes. */ @@ -789,7 +790,7 @@ int Tcl_UtfCharComplete( const char *src, /* String to check if first few bytes contain * a complete UTF-8 character. */ - int length) /* Length of above string in bytes. */ + size_t length) /* Length of above string in bytes. */ { return length >= complete[UCHAR(*src)]; } @@ -812,18 +813,18 @@ Tcl_UtfCharComplete( *--------------------------------------------------------------------------- */ -int +size_t Tcl_NumUtfChars( const char *src, /* The UTF-8 string to measure. */ - int length) /* The length of the string in bytes, or -1 - * for strlen(string). */ + size_t length) /* The length of the string in bytes, or + * TCL_AUTO_LENGTH for strlen(src). */ { Tcl_UniChar ch = 0; - int i = 0; + size_t i = 0; - if (length < 0) { + if (length == TCL_AUTO_LENGTH) { /* string is NUL-terminated, so TclUtfToUniChar calls are safe. */ - while ((*src != '\0') && (i < INT_MAX)) { + while (*src != '\0') { src += TclUtfToUniChar(src, &ch); i++; } @@ -966,7 +967,7 @@ const char * Tcl_UtfNext( const char *src) /* The current location in the string. */ { - int left; + size_t left; const char *next; if (((*src) & 0xC0) == 0x80) { @@ -1140,15 +1141,15 @@ Tcl_UtfPrev( int Tcl_UniCharAtIndex( const char *src, /* The UTF-8 string to dereference. */ - int index) /* The position of the desired character. */ + size_t index) /* The position of the desired character. */ { Tcl_UniChar ch = 0; int i = 0; - if (index < 0) { + if (index == TCL_INDEX_NONE) { return -1; } - while (index-- > 0) { + while (index--) { i = TclUtfToUniChar(src, &ch); src += i; } @@ -1184,21 +1185,28 @@ Tcl_UniCharAtIndex( const char * Tcl_UtfAtIndex( const char *src, /* The UTF-8 string. */ - int index) /* The position of the desired character. */ + size_t index) /* The position of the desired character. */ { Tcl_UniChar ch = 0; - int len = 0; +#if TCL_UTF_MAX <= 3 + size_t len = 0; +#endif - while (index-- > 0) { - len = TclUtfToUniChar(src, &ch); - src += len; - } + if (index != TCL_INDEX_NONE) { + while (index--) { +#if TCL_UTF_MAX <= 3 + src += (len = TclUtfToUniChar(src, &ch)); +#else + src += TclUtfToUniChar(src, &ch); +#endif + } #if TCL_UTF_MAX <= 3 if ((ch >= 0xD800) && (len < 3)) { /* Index points at character following high Surrogate */ src += TclUtfToUniChar(src, &ch); } #endif + } return src; } @@ -1228,7 +1236,7 @@ Tcl_UtfAtIndex( *--------------------------------------------------------------------------- */ -int +size_t Tcl_UtfBackslash( const char *src, /* Points to the backslash character of a * backslash sequence. */ @@ -1238,8 +1246,7 @@ Tcl_UtfBackslash( * backslash sequence. */ { #define LINE_LENGTH 128 - int numRead; - int result; + size_t numRead, result; result = TclParseBackslash(src, LINE_LENGTH, &numRead, dst); if (numRead == LINE_LENGTH) { @@ -1279,7 +1286,7 @@ Tcl_UtfToUpper( { int ch, upChar; char *src, *dst; - int len; + size_t len; /* * Iterate over the string until we hit the terminating null. @@ -1332,7 +1339,7 @@ Tcl_UtfToLower( { int ch, lowChar; char *src, *dst; - int len; + size_t len; /* * Iterate over the string until we hit the terminating null. @@ -1386,7 +1393,7 @@ Tcl_UtfToTitle( { int ch, titleChar, lowChar; char *src, *dst; - int len; + size_t len; /* * Capitalize the first character and then lowercase the rest of the @@ -1448,7 +1455,7 @@ int TclpUtfNcmp2( const char *cs, /* UTF string to compare to ct. */ const char *ct, /* UTF string cs is compared to. */ - unsigned long numBytes) /* Number of *bytes* to compare. */ + size_t numBytes) /* Number of *bytes* to compare. */ { /* * We can't simply call 'memcmp(cs, ct, numBytes);' because we need to @@ -1495,7 +1502,7 @@ int Tcl_UtfNcmp( const char *cs, /* UTF string to compare to ct. */ const char *ct, /* UTF string cs is compared to. */ - unsigned long numChars) /* Number of UTF chars to compare. */ + size_t numChars) /* Number of UTF chars to compare. */ { Tcl_UniChar ch1 = 0, ch2 = 0; @@ -1553,7 +1560,7 @@ int Tcl_UtfNcasecmp( const char *cs, /* UTF string to compare to ct. */ const char *ct, /* UTF string cs is compared to. */ - unsigned long numChars) /* Number of UTF chars to compare. */ + size_t numChars) /* Number of UTF chars to compare. */ { Tcl_UniChar ch1 = 0, ch2 = 0; @@ -1788,7 +1795,7 @@ Tcl_UniCharToTitle( /* *---------------------------------------------------------------------- * - * Tcl_UniCharLen -- + * TclUniCharLen -- * * Find the length of a UniChar string. The str input must be null * terminated. @@ -1802,11 +1809,11 @@ Tcl_UniCharToTitle( *---------------------------------------------------------------------- */ -int -Tcl_UniCharLen( +size_t +TclUniCharLen( const Tcl_UniChar *uniStr) /* Unicode string to find length of. */ { - int len = 0; + size_t len = 0; while (*uniStr != '\0') { len++; @@ -1818,7 +1825,7 @@ Tcl_UniCharLen( /* *---------------------------------------------------------------------- * - * Tcl_UniCharNcmp -- + * TclUniCharNcmp -- * * Compare at most numChars unichars of string ucs to string uct. * Both ucs and uct are assumed to be at least numChars unichars long. @@ -1833,10 +1840,10 @@ Tcl_UniCharLen( */ int -Tcl_UniCharNcmp( +TclUniCharNcmp( const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */ const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */ - unsigned long numChars) /* Number of unichars to compare. */ + size_t numChars) /* Number of unichars to compare. */ { #ifdef WORDS_BIGENDIAN /* @@ -1862,7 +1869,7 @@ Tcl_UniCharNcmp( /* *---------------------------------------------------------------------- * - * Tcl_UniCharNcasecmp -- + * TclUniCharNcasecmp -- * * Compare at most numChars unichars of string ucs to string uct case * insensitive. Both ucs and uct are assumed to be at least numChars @@ -1878,10 +1885,10 @@ Tcl_UniCharNcmp( */ int -Tcl_UniCharNcasecmp( +TclUniCharNcasecmp( const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */ const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */ - unsigned long numChars) /* Number of unichars to compare. */ + size_t numChars) /* Number of unichars to compare. */ { for ( ; numChars != 0; numChars--, ucs++, uct++) { if (*ucs != *uct) { @@ -2207,7 +2214,7 @@ Tcl_UniCharIsWordChar( /* *---------------------------------------------------------------------- * - * Tcl_UniCharCaseMatch -- + * TclUniCharCaseMatch -- * * See if a particular Unicode string matches a particular pattern. * Allows case insensitivity. This is the Unicode equivalent of the char* @@ -2228,7 +2235,7 @@ Tcl_UniCharIsWordChar( */ int -Tcl_UniCharCaseMatch( +TclUniCharCaseMatch( const Tcl_UniChar *uniStr, /* Unicode String. */ const Tcl_UniChar *uniPattern, /* Pattern, which may contain special @@ -2295,7 +2302,7 @@ Tcl_UniCharCaseMatch( } } } - if (Tcl_UniCharCaseMatch(uniStr, uniPattern, nocase)) { + if (TclUniCharCaseMatch(uniStr, uniPattern, nocase)) { return 1; } if (*uniStr == 0) { @@ -2401,7 +2408,7 @@ Tcl_UniCharCaseMatch( * * See if a particular Unicode string matches a particular pattern. * Allows case insensitivity. This is the Unicode equivalent of the char* - * Tcl_StringCaseMatch. This variant of Tcl_UniCharCaseMatch uses counted + * Tcl_StringCaseMatch. This variant of TclUniCharCaseMatch uses counted * Strings, so embedded NULLs are allowed. * * Results: @@ -2418,10 +2425,10 @@ Tcl_UniCharCaseMatch( int TclUniCharMatch( const Tcl_UniChar *string, /* Unicode String. */ - int strLen, /* Length of String */ + size_t strLen, /* Length of String */ const Tcl_UniChar *pattern, /* Pattern, which may contain special * characters. */ - int ptnLen, /* Length of Pattern */ + size_t ptnLen, /* Length of Pattern */ int nocase) /* 0 for case sensitive, 1 for insensitive */ { const Tcl_UniChar *stringEnd, *patternEnd; |