diff options
Diffstat (limited to 'generic/tclUtf.c')
-rw-r--r-- | generic/tclUtf.c | 163 |
1 files changed, 118 insertions, 45 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 3937141..52b4291 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -59,7 +59,7 @@ * UTF-8. */ -static CONST unsigned char totalBytes[256] = { +static const unsigned char totalBytes[256] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, @@ -213,13 +213,13 @@ three: char * Tcl_UniCharToUtfDString( - CONST Tcl_UniChar *uniStr, /* Unicode string to convert to UTF-8. */ + const Tcl_UniChar *uniStr, /* Unicode string to convert to UTF-8. */ int uniLength, /* Length of Unicode string in Tcl_UniChars * (must be >= 0). */ Tcl_DString *dsPtr) /* UTF-8 representation of string is appended * to this previously initialized DString. */ { - CONST Tcl_UniChar *w, *wEnd; + const Tcl_UniChar *w, *wEnd; char *p, *string; int oldLength; @@ -271,7 +271,7 @@ Tcl_UniCharToUtfDString( int Tcl_UtfToUniChar( - register CONST char *src, /* The UTF-8 string. */ + register const char *src, /* The UTF-8 string. */ register Tcl_UniChar *chPtr)/* Filled with the Tcl_UniChar represented by * the UTF-8 string. */ { @@ -315,7 +315,7 @@ Tcl_UtfToUniChar( *chPtr = (Tcl_UniChar) (((byte & 0x0F) << 12) | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F)); - if (*chPtr > 0x7ff) { + if (*chPtr > 0x7FF) { return 3; } } @@ -370,7 +370,7 @@ Tcl_UtfToUniChar( Tcl_UniChar * Tcl_UtfToUniCharDString( - CONST char *src, /* UTF-8 string to convert to Unicode. */ + const char *src, /* UTF-8 string to convert to Unicode. */ int length, /* Length of UTF-8 string in bytes, or -1 for * strlen(). */ Tcl_DString *dsPtr) /* Unicode representation of string is @@ -378,7 +378,7 @@ Tcl_UtfToUniCharDString( * DString. */ { Tcl_UniChar *w, *wString; - CONST char *p, *end; + const char *p, *end; int oldLength; if (length < 0) { @@ -391,6 +391,7 @@ Tcl_UtfToUniCharDString( */ oldLength = Tcl_DStringLength(dsPtr); +/* TODO: fix overreach! */ Tcl_DStringSetLength(dsPtr, (int) ((oldLength + length + 1) * sizeof(Tcl_UniChar))); wString = (Tcl_UniChar *) (Tcl_DStringValue(dsPtr) + oldLength); @@ -429,7 +430,7 @@ Tcl_UtfToUniCharDString( int Tcl_UtfCharComplete( - CONST char *src, /* String to check if first few bytes contain + const char *src, /* String to check if first few bytes contain * a complete UTF-8 character. */ int length) /* Length of above string in bytes. */ { @@ -459,7 +460,7 @@ Tcl_UtfCharComplete( int Tcl_NumUtfChars( - register CONST char *src, /* The UTF-8 string to measure. */ + register const char *src, /* The UTF-8 string to measure. */ int length) /* The length of the string in bytes, or -1 * for strlen(string). */ { @@ -517,9 +518,9 @@ Tcl_NumUtfChars( *--------------------------------------------------------------------------- */ -CONST char * +const char * Tcl_UtfFindFirst( - CONST char *src, /* The UTF-8 string to be searched. */ + const char *src, /* The UTF-8 string to be searched. */ int ch) /* The Tcl_UniChar to search for. */ { int len; @@ -556,14 +557,14 @@ Tcl_UtfFindFirst( *--------------------------------------------------------------------------- */ -CONST char * +const char * Tcl_UtfFindLast( - CONST char *src, /* The UTF-8 string to be searched. */ + const char *src, /* The UTF-8 string to be searched. */ int ch) /* The Tcl_UniChar to search for. */ { int len; Tcl_UniChar find; - CONST char *last; + const char *last; last = NULL; while (1) { @@ -598,9 +599,9 @@ Tcl_UtfFindLast( *--------------------------------------------------------------------------- */ -CONST char * +const char * Tcl_UtfNext( - CONST char *src) /* The current location in the string. */ + const char *src) /* The current location in the string. */ { Tcl_UniChar ch; @@ -628,13 +629,13 @@ Tcl_UtfNext( *--------------------------------------------------------------------------- */ -CONST char * +const char * Tcl_UtfPrev( - CONST char *src, /* The current location in the string. */ - CONST char *start) /* Pointer to the beginning of the string, to + const char *src, /* The current location in the string. */ + const char *start) /* Pointer to the beginning of the string, to * avoid going backwards too far. */ { - CONST char *look; + const char *look; int i, byte; src--; @@ -677,10 +678,10 @@ Tcl_UtfPrev( Tcl_UniChar Tcl_UniCharAtIndex( - register CONST char *src, /* The UTF-8 string to dereference. */ + register const char *src, /* The UTF-8 string to dereference. */ register int index) /* The position of the desired character. */ { - Tcl_UniChar ch; + Tcl_UniChar ch = 0; while (index >= 0) { index--; @@ -706,9 +707,9 @@ Tcl_UniCharAtIndex( *--------------------------------------------------------------------------- */ -CONST char * +const char * Tcl_UtfAtIndex( - register CONST char *src, /* The UTF-8 string. */ + register const char *src, /* The UTF-8 string. */ register int index) /* The position of the desired character. */ { Tcl_UniChar ch; @@ -748,7 +749,7 @@ Tcl_UtfAtIndex( int Tcl_UtfBackslash( - CONST char *src, /* Points to the backslash character of a + const char *src, /* Points to the backslash character of a * backslash sequence. */ int *readPtr, /* Fill in with number of characters read from * src, unless NULL. */ @@ -960,8 +961,8 @@ Tcl_UtfToTitle( int TclpUtfNcmp2( - CONST char *cs, /* UTF string to compare to ct. */ - CONST char *ct, /* UTF string cs is compared to. */ + const char *cs, /* UTF string to compare to ct. */ + const char *ct, /* UTF string cs is compared to. */ unsigned long numBytes) /* Number of *bytes* to compare. */ { /* @@ -1007,8 +1008,8 @@ TclpUtfNcmp2( int Tcl_UtfNcmp( - CONST char *cs, /* UTF string to compare to ct. */ - CONST char *ct, /* UTF string cs is compared to. */ + const char *cs, /* UTF string to compare to ct. */ + const char *ct, /* UTF string cs is compared to. */ unsigned long numChars) /* Number of UTF chars to compare. */ { Tcl_UniChar ch1, ch2; @@ -1055,8 +1056,8 @@ Tcl_UtfNcmp( int Tcl_UtfNcasecmp( - CONST char *cs, /* UTF string to compare to ct. */ - CONST char *ct, /* UTF string cs is compared to. */ + const char *cs, /* UTF string to compare to ct. */ + const char *ct, /* UTF string cs is compared to. */ unsigned long numChars) /* Number of UTF chars to compare. */ { Tcl_UniChar ch1, ch2; @@ -1099,8 +1100,8 @@ Tcl_UtfNcasecmp( int TclUtfCasecmp( - CONST char *cs, /* UTF string to compare to ct. */ - CONST char *ct) /* UTF string cs is compared to. */ + const char *cs, /* UTF string to compare to ct. */ + const char *ct) /* UTF string cs is compared to. */ { while (*cs && *ct) { Tcl_UniChar ch1, ch2; @@ -1229,7 +1230,7 @@ Tcl_UniCharToTitle( int Tcl_UniCharLen( - CONST Tcl_UniChar *uniStr) /* Unicode string to find length of. */ + const Tcl_UniChar *uniStr) /* Unicode string to find length of. */ { int len = 0; @@ -1259,8 +1260,8 @@ Tcl_UniCharLen( int Tcl_UniCharNcmp( - CONST Tcl_UniChar *ucs, /* Unicode string to compare to uct. */ - CONST Tcl_UniChar *uct, /* Unicode string ucs is compared to. */ + const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */ + const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */ unsigned long numChars) /* Number of unichars to compare. */ { #ifdef WORDS_BIGENDIAN @@ -1304,8 +1305,8 @@ Tcl_UniCharNcmp( int Tcl_UniCharNcasecmp( - CONST Tcl_UniChar *ucs, /* Unicode string to compare to uct. */ - CONST Tcl_UniChar *uct, /* Unicode string ucs is compared to. */ + const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */ + const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */ unsigned long numChars) /* Number of unichars to compare. */ { for ( ; numChars != 0; numChars--, ucs++, uct++) { @@ -1341,6 +1342,11 @@ int Tcl_UniCharIsAlnum( int ch) /* Unicode character to test. */ { +#if TCL_UTF_MAX > 3 + if (UNICODE_OUT_OF_RANGE(ch)) { + return 0; + } +#endif return (((ALPHA_BITS | DIGIT_BITS) >> GetCategory(ch)) & 1); } @@ -1364,6 +1370,11 @@ int Tcl_UniCharIsAlpha( int ch) /* Unicode character to test. */ { +#if TCL_UTF_MAX > 3 + if (UNICODE_OUT_OF_RANGE(ch)) { + return 0; + } +#endif return ((ALPHA_BITS >> GetCategory(ch)) & 1); } @@ -1387,6 +1398,18 @@ int Tcl_UniCharIsControl( int ch) /* Unicode character to test. */ { +#if TCL_UTF_MAX > 3 + if (UNICODE_OUT_OF_RANGE(ch)) { + ch &= 0x1FFFFF; + if ((ch == 0xE0001) || ((ch >= 0xE0020) && (ch <= 0xE007f))) { + return 1; + } + if ((ch >= 0xF0000) && ((ch & 0xFFFF) <= 0xFFFD)) { + return 1; + } + return 0; + } +#endif return ((CONTROL_BITS >> GetCategory(ch)) & 1); } @@ -1410,6 +1433,11 @@ int Tcl_UniCharIsDigit( int ch) /* Unicode character to test. */ { +#if TCL_UTF_MAX > 3 + if (UNICODE_OUT_OF_RANGE(ch)) { + return 0; + } +#endif return (GetCategory(ch) == DECIMAL_DIGIT_NUMBER); } @@ -1433,6 +1461,12 @@ int Tcl_UniCharIsGraph( int ch) /* Unicode character to test. */ { +#if TCL_UTF_MAX > 3 + if (UNICODE_OUT_OF_RANGE(ch)) { + ch &= 0x1FFFFF; + return (ch >= 0xE0100) && (ch <= 0xE01EF); + } +#endif return ((GRAPH_BITS >> GetCategory(ch)) & 1); } @@ -1456,6 +1490,11 @@ int Tcl_UniCharIsLower( int ch) /* Unicode character to test. */ { +#if TCL_UTF_MAX > 3 + if (UNICODE_OUT_OF_RANGE(ch)) { + return 0; + } +#endif return (GetCategory(ch) == LOWERCASE_LETTER); } @@ -1479,6 +1518,12 @@ int Tcl_UniCharIsPrint( int ch) /* Unicode character to test. */ { +#if TCL_UTF_MAX > 3 + if (UNICODE_OUT_OF_RANGE(ch)) { + ch &= 0x1FFFFF; + return (ch >= 0xE0100) && (ch <= 0xE01EF); + } +#endif return (((GRAPH_BITS|SPACE_BITS) >> GetCategory(ch)) & 1); } @@ -1502,6 +1547,11 @@ int Tcl_UniCharIsPunct( int ch) /* Unicode character to test. */ { +#if TCL_UTF_MAX > 3 + if (UNICODE_OUT_OF_RANGE(ch)) { + return 0; + } +#endif return ((PUNCT_BITS >> GetCategory(ch)) & 1); } @@ -1525,14 +1575,27 @@ int Tcl_UniCharIsSpace( int ch) /* Unicode character to test. */ { +#if TCL_UTF_MAX > 3 + /* Ignore upper 11 bits. */ + ch &= 0x1FFFFF; +#else + /* Ignore upper 16 bits. */ + ch &= 0xFFFF; +#endif + /* * If the character is within the first 127 characters, just use the * standard C function, otherwise consult the Unicode table. */ - if (((Tcl_UniChar) ch) < ((Tcl_UniChar) 0x80)) { + if (ch < 0x80) { return TclIsSpaceProc((char) ch); - } else if ((Tcl_UniChar) ch == 0x180E || (Tcl_UniChar) ch == 0x202F) { +#if TCL_UTF_MAX > 3 + } else if (UNICODE_OUT_OF_RANGE(ch)) { + return 0; +#endif + } else if (ch == 0x0085 || ch == 0x180E || ch == 0x200B + || ch == 0x202F || ch == 0x2060 || ch == 0xFEFF) { return 1; } else { return ((SPACE_BITS >> GetCategory(ch)) & 1); @@ -1559,6 +1622,11 @@ int Tcl_UniCharIsUpper( int ch) /* Unicode character to test. */ { +#if TCL_UTF_MAX > 3 + if (UNICODE_OUT_OF_RANGE(ch)) { + return 0; + } +#endif return (GetCategory(ch) == UPPERCASE_LETTER); } @@ -1582,6 +1650,11 @@ int Tcl_UniCharIsWordChar( int ch) /* Unicode character to test. */ { +#if TCL_UTF_MAX > 3 + if (UNICODE_OUT_OF_RANGE(ch)) { + return 0; + } +#endif return ((WORD_BITS >> GetCategory(ch)) & 1); } @@ -1610,8 +1683,8 @@ Tcl_UniCharIsWordChar( int Tcl_UniCharCaseMatch( - CONST Tcl_UniChar *uniStr, /* Unicode String. */ - CONST Tcl_UniChar *uniPattern, + const Tcl_UniChar *uniStr, /* Unicode String. */ + const Tcl_UniChar *uniPattern, /* Pattern, which may contain special * characters. */ int nocase) /* 0 for case sensitive, 1 for insensitive */ @@ -1798,14 +1871,14 @@ Tcl_UniCharCaseMatch( int TclUniCharMatch( - CONST Tcl_UniChar *string, /* Unicode String. */ + const Tcl_UniChar *string, /* Unicode String. */ int strLen, /* Length of String */ - CONST Tcl_UniChar *pattern, /* Pattern, which may contain special + const Tcl_UniChar *pattern, /* Pattern, which may contain special * characters. */ int ptnLen, /* Length of Pattern */ int nocase) /* 0 for case sensitive, 1 for insensitive */ { - CONST Tcl_UniChar *stringEnd, *patternEnd; + const Tcl_UniChar *stringEnd, *patternEnd; Tcl_UniChar p; stringEnd = string + strLen; |