From 6c9fa195b08a24ca759c7d0e2129a33137bbbccd Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 6 May 2020 13:03:29 +0000 Subject: Change Invalid() parameter type to "const char *". Also call Invalid() first in Tcl_UtfNext(), so if src[1] is invalid src[2] doesn't need to be checked any more. Note: This order change, calling Invalid() first was wrong, and is corrected in later commits. Thanks, Don, for noticing this! --- generic/tclUtf.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 528d5de..60e475a 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -81,7 +81,7 @@ static CONST unsigned char totalBytes[256] = { */ static int UtfCount(int ch); -static int Invalid(unsigned char *src); +static int Invalid(const char *src); /* *--------------------------------------------------------------------------- @@ -154,9 +154,9 @@ static CONST unsigned char bounds[28] = { INLINE static int Invalid( - unsigned char *src) /* Points to lead byte of a UTF-8 byte sequence */ + const char *src) /* Points to lead byte of a UTF-8 byte sequence */ { - unsigned char byte = *src; + unsigned char byte = UCHAR(*src); int index; if ((byte & 0xC3) != 0xC0) { @@ -164,7 +164,7 @@ Invalid( return 0; } index = (byte - 0xC0) >> 1; - if (src[1] < bounds[index] || src[1] > bounds[index+1]) { + if (UCHAR(src[1]) < bounds[index] || UCHAR(src[1]) > bounds[index+1]) { /* Out of bounds - report invalid. */ return 1; } @@ -648,9 +648,14 @@ CONST char * Tcl_UtfNext( CONST char *src) /* The current location in the string. */ { - int left = totalBytes[UCHAR(*src)]; - const char *next = src + 1; + int left; + const char *next; + if (Invalid(src)) { + return src + 1; + } + left = totalBytes[UCHAR(*src)]; + next = src + 1; while (--left) { if ((*next & 0xC0) != 0x80) { /* @@ -662,9 +667,6 @@ Tcl_UtfNext( } next++; } - if (Invalid((unsigned char *)src)) { - return src + 1; - } return next; } @@ -699,7 +701,7 @@ Tcl_UtfPrev( /* If we cannot find a lead byte that might * start a prefix of a valid UTF byte sequence, * we will fallback to a one-byte back step */ - unsigned char *look = (unsigned char *)fallback; + const char *look = fallback; /* Start search at the fallback position */ /* Quick boundary case exit. */ @@ -708,7 +710,7 @@ Tcl_UtfPrev( } do { - unsigned char byte = look[0]; + unsigned char byte = UCHAR(look[0]); if (byte < 0x80) { /* -- cgit v0.12