summaryrefslogtreecommitdiffstats
path: root/generic/tclUtf.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclUtf.c')
-rw-r--r--generic/tclUtf.c163
1 files changed, 118 insertions, 45 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 3937141..52b4291 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -59,7 +59,7 @@
* UTF-8.
*/
-static CONST unsigned char totalBytes[256] = {
+static const unsigned char totalBytes[256] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@@ -213,13 +213,13 @@ three:
char *
Tcl_UniCharToUtfDString(
- CONST Tcl_UniChar *uniStr, /* Unicode string to convert to UTF-8. */
+ const Tcl_UniChar *uniStr, /* Unicode string to convert to UTF-8. */
int uniLength, /* Length of Unicode string in Tcl_UniChars
* (must be >= 0). */
Tcl_DString *dsPtr) /* UTF-8 representation of string is appended
* to this previously initialized DString. */
{
- CONST Tcl_UniChar *w, *wEnd;
+ const Tcl_UniChar *w, *wEnd;
char *p, *string;
int oldLength;
@@ -271,7 +271,7 @@ Tcl_UniCharToUtfDString(
int
Tcl_UtfToUniChar(
- register CONST char *src, /* The UTF-8 string. */
+ register const char *src, /* The UTF-8 string. */
register Tcl_UniChar *chPtr)/* Filled with the Tcl_UniChar represented by
* the UTF-8 string. */
{
@@ -315,7 +315,7 @@ Tcl_UtfToUniChar(
*chPtr = (Tcl_UniChar) (((byte & 0x0F) << 12)
| ((src[1] & 0x3F) << 6) | (src[2] & 0x3F));
- if (*chPtr > 0x7ff) {
+ if (*chPtr > 0x7FF) {
return 3;
}
}
@@ -370,7 +370,7 @@ Tcl_UtfToUniChar(
Tcl_UniChar *
Tcl_UtfToUniCharDString(
- CONST char *src, /* UTF-8 string to convert to Unicode. */
+ const char *src, /* UTF-8 string to convert to Unicode. */
int length, /* Length of UTF-8 string in bytes, or -1 for
* strlen(). */
Tcl_DString *dsPtr) /* Unicode representation of string is
@@ -378,7 +378,7 @@ Tcl_UtfToUniCharDString(
* DString. */
{
Tcl_UniChar *w, *wString;
- CONST char *p, *end;
+ const char *p, *end;
int oldLength;
if (length < 0) {
@@ -391,6 +391,7 @@ Tcl_UtfToUniCharDString(
*/
oldLength = Tcl_DStringLength(dsPtr);
+/* TODO: fix overreach! */
Tcl_DStringSetLength(dsPtr,
(int) ((oldLength + length + 1) * sizeof(Tcl_UniChar)));
wString = (Tcl_UniChar *) (Tcl_DStringValue(dsPtr) + oldLength);
@@ -429,7 +430,7 @@ Tcl_UtfToUniCharDString(
int
Tcl_UtfCharComplete(
- CONST char *src, /* String to check if first few bytes contain
+ const char *src, /* String to check if first few bytes contain
* a complete UTF-8 character. */
int length) /* Length of above string in bytes. */
{
@@ -459,7 +460,7 @@ Tcl_UtfCharComplete(
int
Tcl_NumUtfChars(
- register CONST char *src, /* The UTF-8 string to measure. */
+ register const char *src, /* The UTF-8 string to measure. */
int length) /* The length of the string in bytes, or -1
* for strlen(string). */
{
@@ -517,9 +518,9 @@ Tcl_NumUtfChars(
*---------------------------------------------------------------------------
*/
-CONST char *
+const char *
Tcl_UtfFindFirst(
- CONST char *src, /* The UTF-8 string to be searched. */
+ const char *src, /* The UTF-8 string to be searched. */
int ch) /* The Tcl_UniChar to search for. */
{
int len;
@@ -556,14 +557,14 @@ Tcl_UtfFindFirst(
*---------------------------------------------------------------------------
*/
-CONST char *
+const char *
Tcl_UtfFindLast(
- CONST char *src, /* The UTF-8 string to be searched. */
+ const char *src, /* The UTF-8 string to be searched. */
int ch) /* The Tcl_UniChar to search for. */
{
int len;
Tcl_UniChar find;
- CONST char *last;
+ const char *last;
last = NULL;
while (1) {
@@ -598,9 +599,9 @@ Tcl_UtfFindLast(
*---------------------------------------------------------------------------
*/
-CONST char *
+const char *
Tcl_UtfNext(
- CONST char *src) /* The current location in the string. */
+ const char *src) /* The current location in the string. */
{
Tcl_UniChar ch;
@@ -628,13 +629,13 @@ Tcl_UtfNext(
*---------------------------------------------------------------------------
*/
-CONST char *
+const char *
Tcl_UtfPrev(
- CONST char *src, /* The current location in the string. */
- CONST char *start) /* Pointer to the beginning of the string, to
+ const char *src, /* The current location in the string. */
+ const char *start) /* Pointer to the beginning of the string, to
* avoid going backwards too far. */
{
- CONST char *look;
+ const char *look;
int i, byte;
src--;
@@ -677,10 +678,10 @@ Tcl_UtfPrev(
Tcl_UniChar
Tcl_UniCharAtIndex(
- register CONST char *src, /* The UTF-8 string to dereference. */
+ register const char *src, /* The UTF-8 string to dereference. */
register int index) /* The position of the desired character. */
{
- Tcl_UniChar ch;
+ Tcl_UniChar ch = 0;
while (index >= 0) {
index--;
@@ -706,9 +707,9 @@ Tcl_UniCharAtIndex(
*---------------------------------------------------------------------------
*/
-CONST char *
+const char *
Tcl_UtfAtIndex(
- register CONST char *src, /* The UTF-8 string. */
+ register const char *src, /* The UTF-8 string. */
register int index) /* The position of the desired character. */
{
Tcl_UniChar ch;
@@ -748,7 +749,7 @@ Tcl_UtfAtIndex(
int
Tcl_UtfBackslash(
- CONST char *src, /* Points to the backslash character of a
+ const char *src, /* Points to the backslash character of a
* backslash sequence. */
int *readPtr, /* Fill in with number of characters read from
* src, unless NULL. */
@@ -960,8 +961,8 @@ Tcl_UtfToTitle(
int
TclpUtfNcmp2(
- CONST char *cs, /* UTF string to compare to ct. */
- CONST char *ct, /* UTF string cs is compared to. */
+ const char *cs, /* UTF string to compare to ct. */
+ const char *ct, /* UTF string cs is compared to. */
unsigned long numBytes) /* Number of *bytes* to compare. */
{
/*
@@ -1007,8 +1008,8 @@ TclpUtfNcmp2(
int
Tcl_UtfNcmp(
- CONST char *cs, /* UTF string to compare to ct. */
- CONST char *ct, /* UTF string cs is compared to. */
+ const char *cs, /* UTF string to compare to ct. */
+ const char *ct, /* UTF string cs is compared to. */
unsigned long numChars) /* Number of UTF chars to compare. */
{
Tcl_UniChar ch1, ch2;
@@ -1055,8 +1056,8 @@ Tcl_UtfNcmp(
int
Tcl_UtfNcasecmp(
- CONST char *cs, /* UTF string to compare to ct. */
- CONST char *ct, /* UTF string cs is compared to. */
+ const char *cs, /* UTF string to compare to ct. */
+ const char *ct, /* UTF string cs is compared to. */
unsigned long numChars) /* Number of UTF chars to compare. */
{
Tcl_UniChar ch1, ch2;
@@ -1099,8 +1100,8 @@ Tcl_UtfNcasecmp(
int
TclUtfCasecmp(
- CONST char *cs, /* UTF string to compare to ct. */
- CONST char *ct) /* UTF string cs is compared to. */
+ const char *cs, /* UTF string to compare to ct. */
+ const char *ct) /* UTF string cs is compared to. */
{
while (*cs && *ct) {
Tcl_UniChar ch1, ch2;
@@ -1229,7 +1230,7 @@ Tcl_UniCharToTitle(
int
Tcl_UniCharLen(
- CONST Tcl_UniChar *uniStr) /* Unicode string to find length of. */
+ const Tcl_UniChar *uniStr) /* Unicode string to find length of. */
{
int len = 0;
@@ -1259,8 +1260,8 @@ Tcl_UniCharLen(
int
Tcl_UniCharNcmp(
- CONST Tcl_UniChar *ucs, /* Unicode string to compare to uct. */
- CONST Tcl_UniChar *uct, /* Unicode string ucs is compared to. */
+ const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */
+ const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */
unsigned long numChars) /* Number of unichars to compare. */
{
#ifdef WORDS_BIGENDIAN
@@ -1304,8 +1305,8 @@ Tcl_UniCharNcmp(
int
Tcl_UniCharNcasecmp(
- CONST Tcl_UniChar *ucs, /* Unicode string to compare to uct. */
- CONST Tcl_UniChar *uct, /* Unicode string ucs is compared to. */
+ const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */
+ const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */
unsigned long numChars) /* Number of unichars to compare. */
{
for ( ; numChars != 0; numChars--, ucs++, uct++) {
@@ -1341,6 +1342,11 @@ int
Tcl_UniCharIsAlnum(
int ch) /* Unicode character to test. */
{
+#if TCL_UTF_MAX > 3
+ if (UNICODE_OUT_OF_RANGE(ch)) {
+ return 0;
+ }
+#endif
return (((ALPHA_BITS | DIGIT_BITS) >> GetCategory(ch)) & 1);
}
@@ -1364,6 +1370,11 @@ int
Tcl_UniCharIsAlpha(
int ch) /* Unicode character to test. */
{
+#if TCL_UTF_MAX > 3
+ if (UNICODE_OUT_OF_RANGE(ch)) {
+ return 0;
+ }
+#endif
return ((ALPHA_BITS >> GetCategory(ch)) & 1);
}
@@ -1387,6 +1398,18 @@ int
Tcl_UniCharIsControl(
int ch) /* Unicode character to test. */
{
+#if TCL_UTF_MAX > 3
+ if (UNICODE_OUT_OF_RANGE(ch)) {
+ ch &= 0x1FFFFF;
+ if ((ch == 0xE0001) || ((ch >= 0xE0020) && (ch <= 0xE007f))) {
+ return 1;
+ }
+ if ((ch >= 0xF0000) && ((ch & 0xFFFF) <= 0xFFFD)) {
+ return 1;
+ }
+ return 0;
+ }
+#endif
return ((CONTROL_BITS >> GetCategory(ch)) & 1);
}
@@ -1410,6 +1433,11 @@ int
Tcl_UniCharIsDigit(
int ch) /* Unicode character to test. */
{
+#if TCL_UTF_MAX > 3
+ if (UNICODE_OUT_OF_RANGE(ch)) {
+ return 0;
+ }
+#endif
return (GetCategory(ch) == DECIMAL_DIGIT_NUMBER);
}
@@ -1433,6 +1461,12 @@ int
Tcl_UniCharIsGraph(
int ch) /* Unicode character to test. */
{
+#if TCL_UTF_MAX > 3
+ if (UNICODE_OUT_OF_RANGE(ch)) {
+ ch &= 0x1FFFFF;
+ return (ch >= 0xE0100) && (ch <= 0xE01EF);
+ }
+#endif
return ((GRAPH_BITS >> GetCategory(ch)) & 1);
}
@@ -1456,6 +1490,11 @@ int
Tcl_UniCharIsLower(
int ch) /* Unicode character to test. */
{
+#if TCL_UTF_MAX > 3
+ if (UNICODE_OUT_OF_RANGE(ch)) {
+ return 0;
+ }
+#endif
return (GetCategory(ch) == LOWERCASE_LETTER);
}
@@ -1479,6 +1518,12 @@ int
Tcl_UniCharIsPrint(
int ch) /* Unicode character to test. */
{
+#if TCL_UTF_MAX > 3
+ if (UNICODE_OUT_OF_RANGE(ch)) {
+ ch &= 0x1FFFFF;
+ return (ch >= 0xE0100) && (ch <= 0xE01EF);
+ }
+#endif
return (((GRAPH_BITS|SPACE_BITS) >> GetCategory(ch)) & 1);
}
@@ -1502,6 +1547,11 @@ int
Tcl_UniCharIsPunct(
int ch) /* Unicode character to test. */
{
+#if TCL_UTF_MAX > 3
+ if (UNICODE_OUT_OF_RANGE(ch)) {
+ return 0;
+ }
+#endif
return ((PUNCT_BITS >> GetCategory(ch)) & 1);
}
@@ -1525,14 +1575,27 @@ int
Tcl_UniCharIsSpace(
int ch) /* Unicode character to test. */
{
+#if TCL_UTF_MAX > 3
+ /* Ignore upper 11 bits. */
+ ch &= 0x1FFFFF;
+#else
+ /* Ignore upper 16 bits. */
+ ch &= 0xFFFF;
+#endif
+
/*
* If the character is within the first 127 characters, just use the
* standard C function, otherwise consult the Unicode table.
*/
- if (((Tcl_UniChar) ch) < ((Tcl_UniChar) 0x80)) {
+ if (ch < 0x80) {
return TclIsSpaceProc((char) ch);
- } else if ((Tcl_UniChar) ch == 0x180E || (Tcl_UniChar) ch == 0x202F) {
+#if TCL_UTF_MAX > 3
+ } else if (UNICODE_OUT_OF_RANGE(ch)) {
+ return 0;
+#endif
+ } else if (ch == 0x0085 || ch == 0x180E || ch == 0x200B
+ || ch == 0x202F || ch == 0x2060 || ch == 0xFEFF) {
return 1;
} else {
return ((SPACE_BITS >> GetCategory(ch)) & 1);
@@ -1559,6 +1622,11 @@ int
Tcl_UniCharIsUpper(
int ch) /* Unicode character to test. */
{
+#if TCL_UTF_MAX > 3
+ if (UNICODE_OUT_OF_RANGE(ch)) {
+ return 0;
+ }
+#endif
return (GetCategory(ch) == UPPERCASE_LETTER);
}
@@ -1582,6 +1650,11 @@ int
Tcl_UniCharIsWordChar(
int ch) /* Unicode character to test. */
{
+#if TCL_UTF_MAX > 3
+ if (UNICODE_OUT_OF_RANGE(ch)) {
+ return 0;
+ }
+#endif
return ((WORD_BITS >> GetCategory(ch)) & 1);
}
@@ -1610,8 +1683,8 @@ Tcl_UniCharIsWordChar(
int
Tcl_UniCharCaseMatch(
- CONST Tcl_UniChar *uniStr, /* Unicode String. */
- CONST Tcl_UniChar *uniPattern,
+ const Tcl_UniChar *uniStr, /* Unicode String. */
+ const Tcl_UniChar *uniPattern,
/* Pattern, which may contain special
* characters. */
int nocase) /* 0 for case sensitive, 1 for insensitive */
@@ -1798,14 +1871,14 @@ Tcl_UniCharCaseMatch(
int
TclUniCharMatch(
- CONST Tcl_UniChar *string, /* Unicode String. */
+ const Tcl_UniChar *string, /* Unicode String. */
int strLen, /* Length of String */
- CONST Tcl_UniChar *pattern, /* Pattern, which may contain special
+ const Tcl_UniChar *pattern, /* Pattern, which may contain special
* characters. */
int ptnLen, /* Length of Pattern */
int nocase) /* 0 for case sensitive, 1 for insensitive */
{
- CONST Tcl_UniChar *stringEnd, *patternEnd;
+ const Tcl_UniChar *stringEnd, *patternEnd;
Tcl_UniChar p;
stringEnd = string + strLen;