summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/tclUtf.c49
1 files changed, 26 insertions, 23 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 43958af..1d5cff5 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -376,7 +376,7 @@ Tcl_UtfToUniChar(
* Unroll 1 to 3 (or 4) byte UTF-8 sequences.
*/
- byte = *((unsigned char *) src);
+ byte = UCHAR(*src);
if (byte < 0xC0) {
/*
* Handles properly formed UTF-8 characters between 0x01 and 0x7F.
@@ -498,8 +498,12 @@ Tcl_UtfToUniCharDString(
* DString. */
{
Tcl_UniChar ch = 0, *w, *wString;
- const char *p, *end;
+ const char *p;
int oldLength;
+ /* Pointer to the end of string. Never read endPtr[0] */
+ const char *endPtr = src + length;
+ /* Pointer to last byte where optimization still can be used */
+ const char *optPtr = endPtr - TCL_UTF_MAX;
if (length < 0) {
length = strlen(src);
@@ -513,28 +517,27 @@ Tcl_UtfToUniCharDString(
oldLength = Tcl_DStringLength(dsPtr);
Tcl_DStringSetLength(dsPtr,
- oldLength + (int) ((length + 1) * sizeof(Tcl_UniChar)));
+ oldLength + ((length + 1) * sizeof(Tcl_UniChar)));
wString = (Tcl_UniChar *) (Tcl_DStringValue(dsPtr) + oldLength);
w = wString;
p = src;
- end = src + length - TCL_UTF_MAX;
- while (p < end) {
+ endPtr = src + length;
+ optPtr = endPtr - TCL_UTF_MAX;
+ while (p <= optPtr) {
p += TclUtfToUniChar(p, &ch);
*w++ = ch;
}
- end += TCL_UTF_MAX;
- while (p < end) {
- if (Tcl_UtfCharComplete(p, end-p)) {
- p += TclUtfToUniChar(p, &ch);
- } else {
- ch = UCHAR(*p++);
- }
+ while ((p < endPtr) && Tcl_UtfCharComplete(p, endPtr-p)) {
+ p += TclUtfToUniChar(p, &ch);
*w++ = ch;
}
+ while (p < endPtr) {
+ *w++ = UCHAR(*p++);
+ }
*w = '\0';
Tcl_DStringSetLength(dsPtr,
- (oldLength + ((char *) w - (char *) wString)));
+ oldLength + ((char *) w - (char *) wString));
return wString;
}
@@ -605,17 +608,17 @@ Tcl_NumUtfChars(
/* Pointer to the end of string. Never read endPtr[0] */
const char *endPtr = src + length;
- /* Pointer to breakpoint in scan where optimization is lost */
- const char *optPtr = endPtr - TCL_UTF_MAX + 1;
+ /* Pointer to last byte where optimization still can be used */
+ const char *optPtr = endPtr - TCL_UTF_MAX;
/*
* Optimize away the call in this loop. Justified because...
- * when (src < optPtr), (endPtr - src) > (endPtr - optPtr)
- * By initialization above (endPtr - optPtr) = TCL_UTF_MAX - 1
+ * when (src <= optPtr), (endPtr - src) >= (endPtr - optPtr)
+ * By initialization above (endPtr - optPtr) = TCL_UTF_MAX
* So (endPtr - src) >= TCL_UTF_MAX, and passing that to
* Tcl_UtfCharComplete we know will cause return of 1.
*/
- while ((src < optPtr)
+ while (src <= optPtr
/* && Tcl_UtfCharComplete(src, endPtr - src) */ ) {
#if TCL_UTF_MAX < 4
if (((unsigned)UCHAR(*src) - 0xF0) < 5) {
@@ -675,9 +678,9 @@ Tcl_UtfFindFirst(
int ch) /* The Unicode character to search for. */
{
while (1) {
- int ucs4, len = TclUtfToUCS4(src, &ucs4);
+ int find, len = TclUtfToUCS4(src, &find);
- if (ucs4 == ch) {
+ if (find == ch) {
return src;
}
if (*src == '\0') {
@@ -714,9 +717,9 @@ Tcl_UtfFindLast(
const char *last = NULL;
while (1) {
- int ucs4, len = TclUtfToUCS4(src, &ucs4);
+ int find, len = TclUtfToUCS4(src, &find);
- if (ucs4 == ch) {
+ if (find == ch) {
last = src;
}
if (*src == '\0') {
@@ -857,7 +860,7 @@ Tcl_UtfPrev(
/*
* trailBytesSeen > 0, so we can examine look[1] safely.
- * Use that capability to screen out overlong sequences.
+ * Use that capability to screen out invalid sequences.
*/
if (Invalid(look)) {