summaryrefslogtreecommitdiffstats
path: root/generic/tclUtf.c
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2018-10-08 19:00:39 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2018-10-08 19:00:39 (GMT)
commit1812305cb35d0096d50f9b1e7c1c368f230b6f07 (patch)
tree29a9a607c030b3cfa6c0ae744822eb76de21ee86 /generic/tclUtf.c
parentfe34611d8ec24bd6dd816cf00fa8ac15355bde79 (diff)
parent99e9ce3656f195c60ba4e928559c05d47167901c (diff)
downloadtcl-1812305cb35d0096d50f9b1e7c1c368f230b6f07.zip
tcl-1812305cb35d0096d50f9b1e7c1c368f230b6f07.tar.gz
tcl-1812305cb35d0096d50f9b1e7c1c368f230b6f07.tar.bz2
TIP #494 implementation: More use of size_t in Tcl 9
Diffstat (limited to 'generic/tclUtf.c')
-rw-r--r--generic/tclUtf.c79
1 files changed, 43 insertions, 36 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index ce67db7..6b73a8f 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -211,14 +211,15 @@ three:
char *
Tcl_UniCharToUtfDString(
const Tcl_UniChar *uniStr, /* Unicode string to convert to UTF-8. */
- int uniLength, /* Length of Unicode string in Tcl_UniChars
+ size_t uniLength, /* Length of Unicode string in Tcl_UniChars
* (must be >= 0). */
Tcl_DString *dsPtr) /* UTF-8 representation of string is appended
* to this previously initialized DString. */
{
const Tcl_UniChar *w, *wEnd;
char *p, *string;
- int oldLength, len = 1;
+ size_t oldLength;
+ int len = 1;
/*
* UTF-8 string length in bytes will be <= Unicode string length * 4.
@@ -414,7 +415,7 @@ Tcl_UtfToUniChar(
Tcl_UniChar *
Tcl_UtfToUniCharDString(
const char *src, /* UTF-8 string to convert to Unicode. */
- int length, /* Length of UTF-8 string in bytes, or -1 for
+ size_t length, /* Length of UTF-8 string in bytes, or -1 for
* strlen(). */
Tcl_DString *dsPtr) /* Unicode representation of string is
* appended to this previously initialized
@@ -422,9 +423,9 @@ Tcl_UtfToUniCharDString(
{
Tcl_UniChar ch = 0, *w, *wString;
const char *p, *end;
- int oldLength;
+ size_t oldLength;
- if (length < 0) {
+ if (length == TCL_AUTO_LENGTH) {
length = strlen(src);
}
@@ -487,9 +488,9 @@ int
Tcl_UtfCharComplete(
const char *src, /* String to check if first few bytes contain
* a complete UTF-8 character. */
- int length) /* Length of above string in bytes. */
+ size_t length) /* Length of above string in bytes. */
{
- return length >= totalBytes[(unsigned char)*src];
+ return length >= totalBytes[(unsigned char)*src];
}
/*
@@ -510,14 +511,14 @@ Tcl_UtfCharComplete(
*---------------------------------------------------------------------------
*/
-int
+size_t
Tcl_NumUtfChars(
register const char *src, /* The UTF-8 string to measure. */
- int length) /* The length of the string in bytes, or -1
+ size_t length) /* The length of the string in bytes, or -1
* for strlen(string). */
{
Tcl_UniChar ch = 0;
- register int i = 0;
+ register size_t i = 0;
/*
* The separate implementations are faster.
@@ -526,12 +527,11 @@ Tcl_NumUtfChars(
* single-byte char case specially.
*/
- if (length < 0) {
+ if (length == TCL_AUTO_LENGTH) {
while (*src != '\0') {
src += TclUtfToUniChar(src, &ch);
i++;
}
- if (i < 0) i = INT_MAX; /* Bug [2738427] */
} else {
register const char *endPtr = src + length - 4;
@@ -750,7 +750,7 @@ Tcl_UtfPrev(
int
Tcl_UniCharAtIndex(
register const char *src, /* The UTF-8 string to dereference. */
- register int index) /* The position of the desired character. */
+ register size_t index) /* The position of the desired character. */
{
Tcl_UniChar ch = 0;
int fullchar = 0;
@@ -758,7 +758,8 @@ Tcl_UniCharAtIndex(
int len = 1;
#endif
- while (index-- >= 0) {
+ src += TclUtfToUniChar(src, &ch);
+ while (index--) {
#if TCL_UTF_MAX <= 4
src += (len = TclUtfToUniChar(src, &ch));
#else
@@ -798,21 +799,28 @@ Tcl_UniCharAtIndex(
const char *
Tcl_UtfAtIndex(
register const char *src, /* The UTF-8 string. */
- register int index) /* The position of the desired character. */
+ register size_t index) /* The position of the desired character. */
{
Tcl_UniChar ch = 0;
+#if TCL_UTF_MAX <= 4
int len = 1;
+#endif
- while (index-- > 0) {
- len = TclUtfToUniChar(src, &ch);
- src += len;
- }
+ if (index != TCL_AUTO_LENGTH) {
+ while (index--) {
#if TCL_UTF_MAX <= 4
- if (!len) {
- /* Index points at character following High Surrogate */
- src += TclUtfToUniChar(src, &ch);
- }
+ src += (len = TclUtfToUniChar(src, &ch));
+#else
+ src += TclUtfToUniChar(src, &ch);
+#endif
+ }
+#if TCL_UTF_MAX <= 4
+ if (!len) {
+ /* Index points at character following High Surrogate */
+ src += TclUtfToUniChar(src, &ch);
+ }
#endif
+ }
return src;
}
@@ -842,7 +850,7 @@ Tcl_UtfAtIndex(
*---------------------------------------------------------------------------
*/
-int
+size_t
Tcl_UtfBackslash(
const char *src, /* Points to the backslash character of a
* backslash sequence. */
@@ -852,8 +860,7 @@ Tcl_UtfBackslash(
* backslash sequence. */
{
#define LINE_LENGTH 128
- int numRead;
- int result;
+ size_t numRead, result;
result = TclParseBackslash(src, LINE_LENGTH, &numRead, dst);
if (numRead == LINE_LENGTH) {
@@ -861,7 +868,7 @@ Tcl_UtfBackslash(
* We ate a whole line. Pay the price of a strlen()
*/
- result = TclParseBackslash(src, (int)strlen(src), &numRead, dst);
+ result = TclParseBackslash(src, strlen(src), &numRead, dst);
}
if (readPtr != NULL) {
*readPtr = numRead;
@@ -1100,7 +1107,7 @@ int
TclpUtfNcmp2(
const char *cs, /* UTF string to compare to ct. */
const char *ct, /* UTF string cs is compared to. */
- unsigned long numBytes) /* Number of *bytes* to compare. */
+ size_t numBytes) /* Number of *bytes* to compare. */
{
/*
* We can't simply call 'memcmp(cs, ct, numBytes);' because we need to
@@ -1147,7 +1154,7 @@ int
Tcl_UtfNcmp(
const char *cs, /* UTF string to compare to ct. */
const char *ct, /* UTF string cs is compared to. */
- unsigned long numChars) /* Number of UTF chars to compare. */
+ size_t numChars) /* Number of UTF chars to compare. */
{
Tcl_UniChar ch1 = 0, ch2 = 0;
@@ -1205,7 +1212,7 @@ int
Tcl_UtfNcasecmp(
const char *cs, /* UTF string to compare to ct. */
const char *ct, /* UTF string cs is compared to. */
- unsigned long numChars) /* Number of UTF chars to compare. */
+ size_t numChars) /* Number of UTF chars to compare. */
{
Tcl_UniChar ch1 = 0, ch2 = 0;
@@ -1451,11 +1458,11 @@ Tcl_UniCharToTitle(
*----------------------------------------------------------------------
*/
-int
+size_t
Tcl_UniCharLen(
const Tcl_UniChar *uniStr) /* Unicode string to find length of. */
{
- int len = 0;
+ size_t len = 0;
while (*uniStr != '\0') {
len++;
@@ -1485,7 +1492,7 @@ int
Tcl_UniCharNcmp(
const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */
const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */
- unsigned long numChars) /* Number of unichars to compare. */
+ size_t numChars) /* Number of unichars to compare. */
{
#ifdef WORDS_BIGENDIAN
/*
@@ -1530,7 +1537,7 @@ int
Tcl_UniCharNcasecmp(
const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */
const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */
- unsigned long numChars) /* Number of unichars to compare. */
+ size_t numChars) /* Number of unichars to compare. */
{
for ( ; numChars != 0; numChars--, ucs++, uct++) {
if (*ucs != *uct) {
@@ -2068,10 +2075,10 @@ Tcl_UniCharCaseMatch(
int
TclUniCharMatch(
const Tcl_UniChar *string, /* Unicode String. */
- int strLen, /* Length of String */
+ size_t strLen, /* Length of String */
const Tcl_UniChar *pattern, /* Pattern, which may contain special
* characters. */
- int ptnLen, /* Length of Pattern */
+ size_t ptnLen, /* Length of Pattern */
int nocase) /* 0 for case sensitive, 1 for insensitive */
{
const Tcl_UniChar *stringEnd, *patternEnd;