summaryrefslogtreecommitdiffstats
path: root/generic/tclUtf.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclUtf.c')
-rw-r--r--generic/tclUtf.c189
1 files changed, 114 insertions, 75 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index b9e1226..e5497a4 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -59,7 +59,7 @@
* UTF-8.
*/
-static const unsigned char totalBytes[256] = {
+static CONST unsigned char totalBytes[256] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@@ -231,13 +231,13 @@ Tcl_UniCharToUtf(
char *
Tcl_UniCharToUtfDString(
- const Tcl_UniChar *uniStr, /* Unicode string to convert to UTF-8. */
- size_t uniLength, /* Length of Unicode string in Tcl_UniChars
+ CONST Tcl_UniChar *uniStr, /* Unicode string to convert to UTF-8. */
+ int uniLength, /* Length of Unicode string in Tcl_UniChars
* (must be >= 0). */
Tcl_DString *dsPtr) /* UTF-8 representation of string is appended
* to this previously initialized DString. */
{
- const Tcl_UniChar *w, *wEnd;
+ CONST Tcl_UniChar *w, *wEnd;
char *p, *string;
int oldLength;
@@ -289,7 +289,7 @@ Tcl_UniCharToUtfDString(
int
Tcl_UtfToUniChar(
- register const char *src, /* The UTF-8 string. */
+ register CONST char *src, /* The UTF-8 string. */
register Tcl_UniChar *chPtr)/* Filled with the Tcl_UniChar represented by
* the UTF-8 string. */
{
@@ -393,18 +393,18 @@ Tcl_UtfToUniChar(
Tcl_UniChar *
Tcl_UtfToUniCharDString(
- const char *src, /* UTF-8 string to convert to Unicode. */
- size_t length, /* Length of UTF-8 string in bytes, or
- * TCL_STRLEN for strlen(). */
+ CONST char *src, /* UTF-8 string to convert to Unicode. */
+ int length, /* Length of UTF-8 string in bytes, or -1 for
+ * strlen(). */
Tcl_DString *dsPtr) /* Unicode representation of string is
* appended to this previously initialized
* DString. */
{
Tcl_UniChar *w, *wString;
- const char *p, *end;
- size_t oldLength;
+ CONST char *p, *end;
+ int oldLength;
- if (length == TCL_STRLEN) {
+ if (length < 0) {
length = strlen(src);
}
@@ -414,9 +414,8 @@ Tcl_UtfToUniCharDString(
*/
oldLength = Tcl_DStringLength(dsPtr);
-/* TODO: fix overreach! */
Tcl_DStringSetLength(dsPtr,
- (oldLength + length + 1) * sizeof(Tcl_UniChar));
+ (int) ((oldLength + length + 1) * sizeof(Tcl_UniChar)));
wString = (Tcl_UniChar *) (Tcl_DStringValue(dsPtr) + oldLength);
w = wString;
@@ -427,7 +426,7 @@ Tcl_UtfToUniCharDString(
}
*w = '\0';
Tcl_DStringSetLength(dsPtr,
- oldLength + ((char *) w - (char *) wString));
+ (oldLength + ((char *) w - (char *) wString)));
return wString;
}
@@ -453,9 +452,9 @@ Tcl_UtfToUniCharDString(
int
Tcl_UtfCharComplete(
- const char *src, /* String to check if first few bytes contain
+ CONST char *src, /* String to check if first few bytes contain
* a complete UTF-8 character. */
- size_t length) /* Length of above string in bytes. */
+ int length) /* Length of above string in bytes. */
{
int ch;
@@ -481,11 +480,11 @@ Tcl_UtfCharComplete(
*---------------------------------------------------------------------------
*/
-size_t
+int
Tcl_NumUtfChars(
- register const char *src, /* The UTF-8 string to measure. */
- size_t length) /* The length of the string in bytes, or
- * TCL_STRLEN for strlen(string). */
+ register CONST char *src, /* The UTF-8 string to measure. */
+ int length) /* The length of the string in bytes, or -1
+ * for strlen(string). */
{
Tcl_UniChar ch;
register Tcl_UniChar *chPtr = &ch;
@@ -499,7 +498,7 @@ Tcl_NumUtfChars(
*/
i = 0;
- if (length == TCL_STRLEN) {
+ if (length < 0) {
while (*src != '\0') {
src += TclUtfToUniChar(src, chPtr);
i++;
@@ -541,9 +540,9 @@ Tcl_NumUtfChars(
*---------------------------------------------------------------------------
*/
-const char *
+CONST char *
Tcl_UtfFindFirst(
- const char *src, /* The UTF-8 string to be searched. */
+ CONST char *src, /* The UTF-8 string to be searched. */
int ch) /* The Tcl_UniChar to search for. */
{
int len;
@@ -580,14 +579,14 @@ Tcl_UtfFindFirst(
*---------------------------------------------------------------------------
*/
-const char *
+CONST char *
Tcl_UtfFindLast(
- const char *src, /* The UTF-8 string to be searched. */
+ CONST char *src, /* The UTF-8 string to be searched. */
int ch) /* The Tcl_UniChar to search for. */
{
int len;
Tcl_UniChar find;
- const char *last;
+ CONST char *last;
last = NULL;
while (1) {
@@ -622,9 +621,9 @@ Tcl_UtfFindLast(
*---------------------------------------------------------------------------
*/
-const char *
+CONST char *
Tcl_UtfNext(
- const char *src) /* The current location in the string. */
+ CONST char *src) /* The current location in the string. */
{
Tcl_UniChar ch;
@@ -652,13 +651,13 @@ Tcl_UtfNext(
*---------------------------------------------------------------------------
*/
-const char *
+CONST char *
Tcl_UtfPrev(
- const char *src, /* The current location in the string. */
- const char *start) /* Pointer to the beginning of the string, to
+ CONST char *src, /* The current location in the string. */
+ CONST char *start) /* Pointer to the beginning of the string, to
* avoid going backwards too far. */
{
- const char *look;
+ CONST char *look;
int i, byte;
src--;
@@ -701,10 +700,10 @@ Tcl_UtfPrev(
Tcl_UniChar
Tcl_UniCharAtIndex(
- register const char *src, /* The UTF-8 string to dereference. */
- register size_t index) /* The position of the desired character. */
+ register CONST char *src, /* The UTF-8 string to dereference. */
+ register int index) /* The position of the desired character. */
{
- Tcl_UniChar ch = 0;
+ Tcl_UniChar ch;
while (index >= 0) {
index--;
@@ -730,10 +729,10 @@ Tcl_UniCharAtIndex(
*---------------------------------------------------------------------------
*/
-const char *
+CONST char *
Tcl_UtfAtIndex(
- register const char *src, /* The UTF-8 string. */
- register size_t index) /* The position of the desired character. */
+ register CONST char *src, /* The UTF-8 string. */
+ register int index) /* The position of the desired character. */
{
Tcl_UniChar ch;
@@ -770,17 +769,18 @@ Tcl_UtfAtIndex(
*---------------------------------------------------------------------------
*/
-size_t
+int
Tcl_UtfBackslash(
- const char *src, /* Points to the backslash character of a
+ CONST char *src, /* Points to the backslash character of a
* backslash sequence. */
- size_t *readPtr, /* Fill in with number of characters read from
+ int *readPtr, /* Fill in with number of characters read from
* src, unless NULL. */
char *dst) /* Filled with the bytes represented by the
* backslash sequence. */
{
#define LINE_LENGTH 128
- size_t numRead, result;
+ int numRead;
+ int result;
result = TclParseBackslash(src, LINE_LENGTH, &numRead, dst);
if (numRead == LINE_LENGTH) {
@@ -820,7 +820,7 @@ Tcl_UtfToUpper(
{
Tcl_UniChar ch, upChar;
char *src, *dst;
- size_t bytes;
+ int bytes;
/*
* Iterate over the string until we hit the terminating null.
@@ -838,7 +838,7 @@ Tcl_UtfToUpper(
*/
if (bytes < UtfCount(upChar)) {
- memcpy(dst, src, bytes);
+ memcpy(dst, src, (size_t) bytes);
dst += bytes;
} else {
dst += Tcl_UniCharToUtf(upChar, dst);
@@ -873,7 +873,7 @@ Tcl_UtfToLower(
{
Tcl_UniChar ch, lowChar;
char *src, *dst;
- size_t bytes;
+ int bytes;
/*
* Iterate over the string until we hit the terminating null.
@@ -891,7 +891,7 @@ Tcl_UtfToLower(
*/
if (bytes < UtfCount(lowChar)) {
- memcpy(dst, src, bytes);
+ memcpy(dst, src, (size_t) bytes);
dst += bytes;
} else {
dst += Tcl_UniCharToUtf(lowChar, dst);
@@ -927,7 +927,7 @@ Tcl_UtfToTitle(
{
Tcl_UniChar ch, titleChar, lowChar;
char *src, *dst;
- size_t bytes;
+ int bytes;
/*
* Capitalize the first character and then lowercase the rest of the
@@ -941,7 +941,7 @@ Tcl_UtfToTitle(
titleChar = Tcl_UniCharToTitle(ch);
if (bytes < UtfCount(titleChar)) {
- memcpy(dst, src, bytes);
+ memcpy(dst, src, (size_t) bytes);
dst += bytes;
} else {
dst += Tcl_UniCharToUtf(titleChar, dst);
@@ -953,7 +953,7 @@ Tcl_UtfToTitle(
lowChar = Tcl_UniCharToLower(ch);
if (bytes < UtfCount(lowChar)) {
- memcpy(dst, src, bytes);
+ memcpy(dst, src, (size_t) bytes);
dst += bytes;
} else {
dst += Tcl_UniCharToUtf(lowChar, dst);
@@ -983,9 +983,9 @@ Tcl_UtfToTitle(
int
TclpUtfNcmp2(
- const char *cs, /* UTF string to compare to ct. */
- const char *ct, /* UTF string cs is compared to. */
- size_t numBytes) /* Number of *bytes* to compare. */
+ CONST char *cs, /* UTF string to compare to ct. */
+ CONST char *ct, /* UTF string cs is compared to. */
+ unsigned long numBytes) /* Number of *bytes* to compare. */
{
/*
* We can't simply call 'memcmp(cs, ct, numBytes);' because we need to
@@ -1030,9 +1030,9 @@ TclpUtfNcmp2(
int
Tcl_UtfNcmp(
- const char *cs, /* UTF string to compare to ct. */
- const char *ct, /* UTF string cs is compared to. */
- size_t numChars) /* Number of UTF chars to compare. */
+ CONST char *cs, /* UTF string to compare to ct. */
+ CONST char *ct, /* UTF string cs is compared to. */
+ unsigned long numChars) /* Number of UTF chars to compare. */
{
Tcl_UniChar ch1, ch2;
@@ -1078,9 +1078,9 @@ Tcl_UtfNcmp(
int
Tcl_UtfNcasecmp(
- const char *cs, /* UTF string to compare to ct. */
- const char *ct, /* UTF string cs is compared to. */
- size_t numChars) /* Number of UTF chars to compare. */
+ CONST char *cs, /* UTF string to compare to ct. */
+ CONST char *ct, /* UTF string cs is compared to. */
+ unsigned long numChars) /* Number of UTF chars to compare. */
{
Tcl_UniChar ch1, ch2;
while (numChars-- > 0) {
@@ -1105,6 +1105,46 @@ Tcl_UtfNcasecmp(
/*
*----------------------------------------------------------------------
*
+ * Tcl_UtfNcasecmp --
+ *
+ * Compare UTF chars of string cs to string ct case insensitively.
+ * Replacement for strcasecmp in Tcl core, in places where UTF-8 should
+ * be handled.
+ *
+ * Results:
+ * Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclUtfCasecmp(
+ CONST char *cs, /* UTF string to compare to ct. */
+ CONST char *ct) /* UTF string cs is compared to. */
+{
+ while (*cs && *ct) {
+ Tcl_UniChar ch1, ch2;
+
+ cs += TclUtfToUniChar(cs, &ch1);
+ ct += TclUtfToUniChar(ct, &ch2);
+ if (ch1 != ch2) {
+ ch1 = Tcl_UniCharToLower(ch1);
+ ch2 = Tcl_UniCharToLower(ch2);
+ if (ch1 != ch2) {
+ return ch1 - ch2;
+ }
+ }
+ }
+ return UCHAR(*cs) - UCHAR(*ct);
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
* Tcl_UniCharToUpper --
*
* Compute the uppercase equivalent of the given Unicode character.
@@ -1212,7 +1252,7 @@ Tcl_UniCharToTitle(
int
Tcl_UniCharLen(
- const Tcl_UniChar *uniStr) /* Unicode string to find length of. */
+ CONST Tcl_UniChar *uniStr) /* Unicode string to find length of. */
{
int len = 0;
@@ -1242,9 +1282,9 @@ Tcl_UniCharLen(
int
Tcl_UniCharNcmp(
- const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */
- const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */
- size_t numChars) /* Number of unichars to compare. */
+ CONST Tcl_UniChar *ucs, /* Unicode string to compare to uct. */
+ CONST Tcl_UniChar *uct, /* Unicode string ucs is compared to. */
+ unsigned long numChars) /* Number of unichars to compare. */
{
#ifdef WORDS_BIGENDIAN
/*
@@ -1287,9 +1327,9 @@ Tcl_UniCharNcmp(
int
Tcl_UniCharNcasecmp(
- const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */
- const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */
- size_t numChars) /* Number of unichars to compare. */
+ CONST Tcl_UniChar *ucs, /* Unicode string to compare to uct. */
+ CONST Tcl_UniChar *uct, /* Unicode string ucs is compared to. */
+ unsigned long numChars) /* Number of unichars to compare. */
{
for ( ; numChars != 0; numChars--, ucs++, uct++) {
if (*ucs != *uct) {
@@ -1514,9 +1554,8 @@ Tcl_UniCharIsSpace(
*/
if (((Tcl_UniChar) ch) < ((Tcl_UniChar) 0x80)) {
- return isspace(UCHAR(ch)); /* INTL: ISO space */
- } else if ((Tcl_UniChar) ch == 0x0085 || (Tcl_UniChar) ch == 0x200b
- || (Tcl_UniChar) ch == 0x2060 || (Tcl_UniChar) ch == 0xfeff) {
+ return TclIsSpaceProc((char) ch);
+ } else if ((Tcl_UniChar) ch == 0x180e || (Tcl_UniChar) ch == 0x202f) {
return 1;
} else {
return ((SPACE_BITS >> GetCategory(ch)) & 1);
@@ -1594,8 +1633,8 @@ Tcl_UniCharIsWordChar(
int
Tcl_UniCharCaseMatch(
- const Tcl_UniChar *uniStr, /* Unicode String. */
- const Tcl_UniChar *uniPattern,
+ CONST Tcl_UniChar *uniStr, /* Unicode String. */
+ CONST Tcl_UniChar *uniPattern,
/* Pattern, which may contain special
* characters. */
int nocase) /* 0 for case sensitive, 1 for insensitive */
@@ -1782,14 +1821,14 @@ Tcl_UniCharCaseMatch(
int
TclUniCharMatch(
- const Tcl_UniChar *string, /* Unicode String. */
- size_t strLen, /* Length of String */
- const Tcl_UniChar *pattern, /* Pattern, which may contain special
+ CONST Tcl_UniChar *string, /* Unicode String. */
+ int strLen, /* Length of String */
+ CONST Tcl_UniChar *pattern, /* Pattern, which may contain special
* characters. */
- size_t ptnLen, /* Length of Pattern */
+ int ptnLen, /* Length of Pattern */
int nocase) /* 0 for case sensitive, 1 for insensitive */
{
- const Tcl_UniChar *stringEnd, *patternEnd;
+ CONST Tcl_UniChar *stringEnd, *patternEnd;
Tcl_UniChar p;
stringEnd = string + strLen;