summaryrefslogtreecommitdiffstats
path: root/generic/tclUtf.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclUtf.c')
-rw-r--r--generic/tclUtf.c121
1 files changed, 64 insertions, 57 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 11bde5c..ac76309 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -119,7 +119,7 @@ static int Invalid(const char *src);
*---------------------------------------------------------------------------
*/
-int
+size_t
TclUtfCount(
int ch) /* The Unicode character whose size is returned. */
{
@@ -314,13 +314,13 @@ three:
char *
Tcl_UniCharToUtfDString(
const int *uniStr, /* Unicode string to convert to UTF-8. */
- int uniLength, /* Length of Unicode string. */
+ size_t uniLength, /* Length of Unicode string. */
Tcl_DString *dsPtr) /* UTF-8 representation of string is appended
* to this previously initialized DString. */
{
const int *w, *wEnd;
char *p, *string;
- int oldLength;
+ size_t oldLength;
/*
* UTF-8 string length in bytes will be <= Unicode string length * 4.
@@ -329,7 +329,7 @@ Tcl_UniCharToUtfDString(
if (uniStr == NULL) {
return NULL;
}
- if (uniLength < 0) {
+ if (uniLength == TCL_AUTO_LENGTH) {
uniLength = 0;
w = uniStr;
while (*w != '\0') {
@@ -355,13 +355,14 @@ Tcl_UniCharToUtfDString(
char *
Tcl_Char16ToUtfDString(
const unsigned short *uniStr,/* Utf-16 string to convert to UTF-8. */
- int uniLength, /* Length of Utf-16 string. */
+ size_t uniLength, /* Length of Utf-16 string. */
Tcl_DString *dsPtr) /* UTF-8 representation of string is appended
* to this previously initialized DString. */
{
const unsigned short *w, *wEnd;
char *p, *string;
- int oldLength, len = 1;
+ size_t oldLength;
+ int len = 1;
/*
* UTF-8 string length in bytes will be <= Utf16 string length * 3.
@@ -370,7 +371,7 @@ Tcl_Char16ToUtfDString(
if (uniStr == NULL) {
return NULL;
}
- if (uniLength < 0) {
+ if (uniLength == TCL_AUTO_LENGTH) {
uniLength = 0;
w = uniStr;
@@ -421,7 +422,7 @@ Tcl_Char16ToUtfDString(
* Tcl_UtfCharComplete() before calling this routine to ensure that
* enough bytes remain in the string.
*
- * If TCL_UTF_MAX <= 4, special handling of Surrogate pairs is done:
+ * If TCL_UTF_MAX <= 3, special handling of Surrogate pairs is done:
* For any UTF-8 string containing a character outside of the BMP, the
* first call to this function will fill *chPtr with the high surrogate
* and generate a return value of 1. Calling Tcl_UtfToUniChar again
@@ -654,7 +655,7 @@ Tcl_UtfToChar16(
int *
Tcl_UtfToUniCharDString(
const char *src, /* UTF-8 string to convert to Unicode. */
- int length, /* Length of UTF-8 string in bytes, or -1 for
+ size_t length, /* Length of UTF-8 string in bytes, or -1 for
* strlen(). */
Tcl_DString *dsPtr) /* Unicode representation of string is
* appended to this previously initialized
@@ -662,7 +663,7 @@ Tcl_UtfToUniCharDString(
{
int ch = 0, *w, *wString;
const char *p;
- int oldLength;
+ size_t oldLength;
/* Pointer to the end of string. Never read endPtr[0] */
const char *endPtr = src + length;
/* Pointer to last byte where optimization still can be used */
@@ -671,7 +672,7 @@ Tcl_UtfToUniCharDString(
if (src == NULL) {
return NULL;
}
- if (length < 0) {
+ if (length == TCL_AUTO_LENGTH) {
length = strlen(src);
}
@@ -711,7 +712,7 @@ Tcl_UtfToUniCharDString(
unsigned short *
Tcl_UtfToChar16DString(
const char *src, /* UTF-8 string to convert to Unicode. */
- int length, /* Length of UTF-8 string in bytes, or -1 for
+ size_t length, /* Length of UTF-8 string in bytes, or -1 for
* strlen(). */
Tcl_DString *dsPtr) /* Unicode representation of string is
* appended to this previously initialized
@@ -719,7 +720,7 @@ Tcl_UtfToChar16DString(
{
unsigned short ch = 0, *w, *wString;
const char *p;
- int oldLength;
+ size_t oldLength;
/* Pointer to the end of string. Never read endPtr[0] */
const char *endPtr = src + length;
/* Pointer to last byte where optimization still can be used */
@@ -728,12 +729,12 @@ Tcl_UtfToChar16DString(
if (src == NULL) {
return NULL;
}
- if (length < 0) {
+ if (length == TCL_AUTO_LENGTH) {
length = strlen(src);
}
/*
- * Unicode string length in Tcl_UniChars will be <= UTF-8 string length in
+ * Unicode string length in WCHARs will be <= UTF-8 string length in
* bytes.
*/
@@ -789,7 +790,7 @@ int
Tcl_UtfCharComplete(
const char *src, /* String to check if first few bytes contain
* a complete UTF-8 character. */
- int length) /* Length of above string in bytes. */
+ size_t length) /* Length of above string in bytes. */
{
return length >= complete[UCHAR(*src)];
}
@@ -812,18 +813,18 @@ Tcl_UtfCharComplete(
*---------------------------------------------------------------------------
*/
-int
+size_t
Tcl_NumUtfChars(
const char *src, /* The UTF-8 string to measure. */
- int length) /* The length of the string in bytes, or -1
- * for strlen(string). */
+ size_t length) /* The length of the string in bytes, or
+ * TCL_AUTO_LENGTH for strlen(src). */
{
Tcl_UniChar ch = 0;
- int i = 0;
+ size_t i = 0;
- if (length < 0) {
+ if (length == TCL_AUTO_LENGTH) {
/* string is NUL-terminated, so TclUtfToUniChar calls are safe. */
- while ((*src != '\0') && (i < INT_MAX)) {
+ while (*src != '\0') {
src += TclUtfToUniChar(src, &ch);
i++;
}
@@ -966,7 +967,7 @@ const char *
Tcl_UtfNext(
const char *src) /* The current location in the string. */
{
- int left;
+ size_t left;
const char *next;
if (((*src) & 0xC0) == 0x80) {
@@ -1140,15 +1141,15 @@ Tcl_UtfPrev(
int
Tcl_UniCharAtIndex(
const char *src, /* The UTF-8 string to dereference. */
- int index) /* The position of the desired character. */
+ size_t index) /* The position of the desired character. */
{
Tcl_UniChar ch = 0;
int i = 0;
- if (index < 0) {
+ if (index == TCL_INDEX_NONE) {
return -1;
}
- while (index-- > 0) {
+ while (index--) {
i = TclUtfToUniChar(src, &ch);
src += i;
}
@@ -1184,21 +1185,28 @@ Tcl_UniCharAtIndex(
const char *
Tcl_UtfAtIndex(
const char *src, /* The UTF-8 string. */
- int index) /* The position of the desired character. */
+ size_t index) /* The position of the desired character. */
{
Tcl_UniChar ch = 0;
- int len = 0;
+#if TCL_UTF_MAX <= 3
+ size_t len = 0;
+#endif
- while (index-- > 0) {
- len = TclUtfToUniChar(src, &ch);
- src += len;
- }
+ if (index != TCL_INDEX_NONE) {
+ while (index--) {
+#if TCL_UTF_MAX <= 3
+ src += (len = TclUtfToUniChar(src, &ch));
+#else
+ src += TclUtfToUniChar(src, &ch);
+#endif
+ }
#if TCL_UTF_MAX <= 3
if ((ch >= 0xD800) && (len < 3)) {
/* Index points at character following high Surrogate */
src += TclUtfToUniChar(src, &ch);
}
#endif
+ }
return src;
}
@@ -1228,7 +1236,7 @@ Tcl_UtfAtIndex(
*---------------------------------------------------------------------------
*/
-int
+size_t
Tcl_UtfBackslash(
const char *src, /* Points to the backslash character of a
* backslash sequence. */
@@ -1238,8 +1246,7 @@ Tcl_UtfBackslash(
* backslash sequence. */
{
#define LINE_LENGTH 128
- int numRead;
- int result;
+ size_t numRead, result;
result = TclParseBackslash(src, LINE_LENGTH, &numRead, dst);
if (numRead == LINE_LENGTH) {
@@ -1279,7 +1286,7 @@ Tcl_UtfToUpper(
{
int ch, upChar;
char *src, *dst;
- int len;
+ size_t len;
/*
* Iterate over the string until we hit the terminating null.
@@ -1332,7 +1339,7 @@ Tcl_UtfToLower(
{
int ch, lowChar;
char *src, *dst;
- int len;
+ size_t len;
/*
* Iterate over the string until we hit the terminating null.
@@ -1386,7 +1393,7 @@ Tcl_UtfToTitle(
{
int ch, titleChar, lowChar;
char *src, *dst;
- int len;
+ size_t len;
/*
* Capitalize the first character and then lowercase the rest of the
@@ -1448,7 +1455,7 @@ int
TclpUtfNcmp2(
const char *cs, /* UTF string to compare to ct. */
const char *ct, /* UTF string cs is compared to. */
- unsigned long numBytes) /* Number of *bytes* to compare. */
+ size_t numBytes) /* Number of *bytes* to compare. */
{
/*
* We can't simply call 'memcmp(cs, ct, numBytes);' because we need to
@@ -1495,7 +1502,7 @@ int
Tcl_UtfNcmp(
const char *cs, /* UTF string to compare to ct. */
const char *ct, /* UTF string cs is compared to. */
- unsigned long numChars) /* Number of UTF chars to compare. */
+ size_t numChars) /* Number of UTF chars to compare. */
{
Tcl_UniChar ch1 = 0, ch2 = 0;
@@ -1553,7 +1560,7 @@ int
Tcl_UtfNcasecmp(
const char *cs, /* UTF string to compare to ct. */
const char *ct, /* UTF string cs is compared to. */
- unsigned long numChars) /* Number of UTF chars to compare. */
+ size_t numChars) /* Number of UTF chars to compare. */
{
Tcl_UniChar ch1 = 0, ch2 = 0;
@@ -1788,7 +1795,7 @@ Tcl_UniCharToTitle(
/*
*----------------------------------------------------------------------
*
- * Tcl_UniCharLen --
+ * TclUniCharLen --
*
* Find the length of a UniChar string. The str input must be null
* terminated.
@@ -1802,11 +1809,11 @@ Tcl_UniCharToTitle(
*----------------------------------------------------------------------
*/
-int
-Tcl_UniCharLen(
+size_t
+TclUniCharLen(
const Tcl_UniChar *uniStr) /* Unicode string to find length of. */
{
- int len = 0;
+ size_t len = 0;
while (*uniStr != '\0') {
len++;
@@ -1818,7 +1825,7 @@ Tcl_UniCharLen(
/*
*----------------------------------------------------------------------
*
- * Tcl_UniCharNcmp --
+ * TclUniCharNcmp --
*
* Compare at most numChars unichars of string ucs to string uct.
* Both ucs and uct are assumed to be at least numChars unichars long.
@@ -1833,10 +1840,10 @@ Tcl_UniCharLen(
*/
int
-Tcl_UniCharNcmp(
+TclUniCharNcmp(
const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */
const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */
- unsigned long numChars) /* Number of unichars to compare. */
+ size_t numChars) /* Number of unichars to compare. */
{
#ifdef WORDS_BIGENDIAN
/*
@@ -1862,7 +1869,7 @@ Tcl_UniCharNcmp(
/*
*----------------------------------------------------------------------
*
- * Tcl_UniCharNcasecmp --
+ * TclUniCharNcasecmp --
*
* Compare at most numChars unichars of string ucs to string uct case
* insensitive. Both ucs and uct are assumed to be at least numChars
@@ -1878,10 +1885,10 @@ Tcl_UniCharNcmp(
*/
int
-Tcl_UniCharNcasecmp(
+TclUniCharNcasecmp(
const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */
const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */
- unsigned long numChars) /* Number of unichars to compare. */
+ size_t numChars) /* Number of unichars to compare. */
{
for ( ; numChars != 0; numChars--, ucs++, uct++) {
if (*ucs != *uct) {
@@ -2207,7 +2214,7 @@ Tcl_UniCharIsWordChar(
/*
*----------------------------------------------------------------------
*
- * Tcl_UniCharCaseMatch --
+ * TclUniCharCaseMatch --
*
* See if a particular Unicode string matches a particular pattern.
* Allows case insensitivity. This is the Unicode equivalent of the char*
@@ -2228,7 +2235,7 @@ Tcl_UniCharIsWordChar(
*/
int
-Tcl_UniCharCaseMatch(
+TclUniCharCaseMatch(
const Tcl_UniChar *uniStr, /* Unicode String. */
const Tcl_UniChar *uniPattern,
/* Pattern, which may contain special
@@ -2295,7 +2302,7 @@ Tcl_UniCharCaseMatch(
}
}
}
- if (Tcl_UniCharCaseMatch(uniStr, uniPattern, nocase)) {
+ if (TclUniCharCaseMatch(uniStr, uniPattern, nocase)) {
return 1;
}
if (*uniStr == 0) {
@@ -2401,7 +2408,7 @@ Tcl_UniCharCaseMatch(
*
* See if a particular Unicode string matches a particular pattern.
* Allows case insensitivity. This is the Unicode equivalent of the char*
- * Tcl_StringCaseMatch. This variant of Tcl_UniCharCaseMatch uses counted
+ * Tcl_StringCaseMatch. This variant of TclUniCharCaseMatch uses counted
* Strings, so embedded NULLs are allowed.
*
* Results:
@@ -2418,10 +2425,10 @@ Tcl_UniCharCaseMatch(
int
TclUniCharMatch(
const Tcl_UniChar *string, /* Unicode String. */
- int strLen, /* Length of String */
+ size_t strLen, /* Length of String */
const Tcl_UniChar *pattern, /* Pattern, which may contain special
* characters. */
- int ptnLen, /* Length of Pattern */
+ size_t ptnLen, /* Length of Pattern */
int nocase) /* 0 for case sensitive, 1 for insensitive */
{
const Tcl_UniChar *stringEnd, *patternEnd;