diff options
| -rw-r--r-- | generic/tclUtf.c | 49 |
1 files changed, 26 insertions, 23 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 43958af..1d5cff5 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -376,7 +376,7 @@ Tcl_UtfToUniChar( * Unroll 1 to 3 (or 4) byte UTF-8 sequences. */ - byte = *((unsigned char *) src); + byte = UCHAR(*src); if (byte < 0xC0) { /* * Handles properly formed UTF-8 characters between 0x01 and 0x7F. @@ -498,8 +498,12 @@ Tcl_UtfToUniCharDString( * DString. */ { Tcl_UniChar ch = 0, *w, *wString; - const char *p, *end; + const char *p; int oldLength; + /* Pointer to the end of string. Never read endPtr[0] */ + const char *endPtr = src + length; + /* Pointer to last byte where optimization still can be used */ + const char *optPtr = endPtr - TCL_UTF_MAX; if (length < 0) { length = strlen(src); @@ -513,28 +517,27 @@ Tcl_UtfToUniCharDString( oldLength = Tcl_DStringLength(dsPtr); Tcl_DStringSetLength(dsPtr, - oldLength + (int) ((length + 1) * sizeof(Tcl_UniChar))); + oldLength + ((length + 1) * sizeof(Tcl_UniChar))); wString = (Tcl_UniChar *) (Tcl_DStringValue(dsPtr) + oldLength); w = wString; p = src; - end = src + length - TCL_UTF_MAX; - while (p < end) { + endPtr = src + length; + optPtr = endPtr - TCL_UTF_MAX; + while (p <= optPtr) { p += TclUtfToUniChar(p, &ch); *w++ = ch; } - end += TCL_UTF_MAX; - while (p < end) { - if (Tcl_UtfCharComplete(p, end-p)) { - p += TclUtfToUniChar(p, &ch); - } else { - ch = UCHAR(*p++); - } + while ((p < endPtr) && Tcl_UtfCharComplete(p, endPtr-p)) { + p += TclUtfToUniChar(p, &ch); *w++ = ch; } + while (p < endPtr) { + *w++ = UCHAR(*p++); + } *w = '\0'; Tcl_DStringSetLength(dsPtr, - (oldLength + ((char *) w - (char *) wString))); + oldLength + ((char *) w - (char *) wString)); return wString; } @@ -605,17 +608,17 @@ Tcl_NumUtfChars( /* Pointer to the end of string. Never read endPtr[0] */ const char *endPtr = src + length; - /* Pointer to breakpoint in scan where optimization is lost */ - const char *optPtr = endPtr - TCL_UTF_MAX + 1; + /* Pointer to last byte where optimization still can be used */ + const char *optPtr = endPtr - TCL_UTF_MAX; /* * Optimize away the call in this loop. Justified because... - * when (src < optPtr), (endPtr - src) > (endPtr - optPtr) - * By initialization above (endPtr - optPtr) = TCL_UTF_MAX - 1 + * when (src <= optPtr), (endPtr - src) >= (endPtr - optPtr) + * By initialization above (endPtr - optPtr) = TCL_UTF_MAX * So (endPtr - src) >= TCL_UTF_MAX, and passing that to * Tcl_UtfCharComplete we know will cause return of 1. */ - while ((src < optPtr) + while (src <= optPtr /* && Tcl_UtfCharComplete(src, endPtr - src) */ ) { #if TCL_UTF_MAX < 4 if (((unsigned)UCHAR(*src) - 0xF0) < 5) { @@ -675,9 +678,9 @@ Tcl_UtfFindFirst( int ch) /* The Unicode character to search for. */ { while (1) { - int ucs4, len = TclUtfToUCS4(src, &ucs4); + int find, len = TclUtfToUCS4(src, &find); - if (ucs4 == ch) { + if (find == ch) { return src; } if (*src == '\0') { @@ -714,9 +717,9 @@ Tcl_UtfFindLast( const char *last = NULL; while (1) { - int ucs4, len = TclUtfToUCS4(src, &ucs4); + int find, len = TclUtfToUCS4(src, &find); - if (ucs4 == ch) { + if (find == ch) { last = src; } if (*src == '\0') { @@ -857,7 +860,7 @@ Tcl_UtfPrev( /* * trailBytesSeen > 0, so we can examine look[1] safely. - * Use that capability to screen out overlong sequences. + * Use that capability to screen out invalid sequences. */ if (Invalid(look)) { |
