diff options
| author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-05-07 10:56:39 (GMT) |
|---|---|---|
| committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-05-07 10:56:39 (GMT) |
| commit | 062c99cefca5d0755a750c751d3e7b452294878e (patch) | |
| tree | 3960d4170c5e6f1bca1caa1baed5c877f2b49d04 | |
| parent | 402b2af43a54b47748174e1ff5b246fab3d61c60 (diff) | |
| parent | fb50148fcc9023ca550a1017a17545ae9469699d (diff) | |
| download | tcl-062c99cefca5d0755a750c751d3e7b452294878e.zip tcl-062c99cefca5d0755a750c751d3e7b452294878e.tar.gz tcl-062c99cefca5d0755a750c751d3e7b452294878e.tar.bz2 | |
Merge 8.6
| -rw-r--r-- | generic/tclInt.h | 3 | ||||
| -rw-r--r-- | generic/tclUtf.c | 74 |
2 files changed, 43 insertions, 34 deletions
diff --git a/generic/tclInt.h b/generic/tclInt.h index 9ef1065..78d9f93 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3253,10 +3253,13 @@ MODULE_SCOPE int TclUtfCount(int ch); #if TCL_UTF_MAX > 3 # define TclUtfToUCS4 Tcl_UtfToUniChar # define TclUCS4Complete Tcl_UtfCharComplete +# define TclChar16Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \ + ? ((length) >= 3) : Tcl_UtfCharComplete((src), (length))) #else MODULE_SCOPE int TclUtfToUCS4(const char *src, int *ucs4Ptr); # define TclUCS4Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \ ? ((length) >= 4) : Tcl_UtfCharComplete((src), (length))) +# define TclChar16Complete Tcl_UtfCharComplete #endif MODULE_SCOPE Tcl_Obj * TclpNativeToNormalized(ClientData clientData); MODULE_SCOPE Tcl_Obj * TclpFilesystemPathType(Tcl_Obj *pathPtr); diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 3cf64cf..7c3416c 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -539,7 +539,7 @@ Tcl_UtfToChar16( * Unroll 1 to 4 byte UTF-8 sequences. */ - byte = *((unsigned char *) src); + byte = UCHAR(*src); if (byte < 0xC0) { /* * Handles properly formed UTF-8 characters between 0x01 and 0x7F. @@ -656,8 +656,12 @@ Tcl_UtfToUniCharDString( * DString. */ { int ch = 0, *w, *wString; - const char *p, *end; + const char *p; int oldLength; + /* Pointer to the end of string. Never read endPtr[0] */ + const char *endPtr = src + length; + /* Pointer to last byte where optimization still can be used */ + const char *optPtr = endPtr - TCL_UTF_MAX; if (src == NULL) { return NULL; @@ -679,20 +683,19 @@ Tcl_UtfToUniCharDString( w = wString; p = src; - end = src + length - 4; - while (p < end) { - p += Tcl_UtfToUniChar(p, &ch); + endPtr = src + length; + optPtr = endPtr - 4; + while (p <= optPtr) { + p += TclUtfToUCS4(p, &ch); *w++ = ch; } - end += 4; - while (p < end) { - if (Tcl_UtfCharComplete(p, end-p)) { - p += Tcl_UtfToUniChar(p, &ch); - } else { - ch = UCHAR(*p++); - } + while ((p < endPtr) && TclUCS4Complete(p, endPtr-p)) { + p += TclUtfToUCS4(p, &ch); *w++ = ch; } + while (p < endPtr) { + *w++ = UCHAR(*p++); + } *w = '\0'; Tcl_DStringSetLength(dsPtr, oldLength + ((char *) w - (char *) wString)); @@ -709,10 +712,13 @@ Tcl_UtfToChar16DString( * appended to this previously initialized * DString. */ { - unsigned short ch = 0; - unsigned short *w, *wString; - const char *p, *end; + unsigned short ch = 0, *w, *wString; + const char *p; int oldLength; + /* Pointer to the end of string. Never read endPtr[0] */ + const char *endPtr = src + length; + /* Pointer to last byte where optimization still can be used */ + const char *optPtr = endPtr - TCL_UTF_MAX; if (src == NULL) { return NULL; @@ -734,26 +740,26 @@ Tcl_UtfToChar16DString( w = wString; p = src; - end = src + length - 4; - while (p < end) { + endPtr = src + length; + optPtr = endPtr - 3; + while (p <= optPtr) { p += Tcl_UtfToChar16(p, &ch); *w++ = ch; } - end += 4; - while (p < end) { - if (Tcl_UtfCharComplete(p, end-p)) { - p += Tcl_UtfToChar16(p, &ch); - } else { - ch = UCHAR(*p++); - } + while ((p < endPtr) && TclChar16Complete(p, endPtr-p)) { + p += Tcl_UtfToChar16(p, &ch); *w++ = ch; } + while (p < endPtr) { + *w++ = UCHAR(*p++); + } *w = '\0'; Tcl_DStringSetLength(dsPtr, oldLength + ((char *) w - (char *) wString)); return wString; } + /* *--------------------------------------------------------------------------- * @@ -820,17 +826,17 @@ Tcl_NumUtfChars( /* Pointer to the end of string. Never read endPtr[0] */ const char *endPtr = src + length; - /* Pointer to breakpoint in scan where optimization is lost */ - const char *optPtr = endPtr - TCL_UTF_MAX + 1; + /* Pointer to last byte where optimization still can be used */ + const char *optPtr = endPtr - TCL_UTF_MAX; /* * Optimize away the call in this loop. Justified because... - * when (src < optPtr), (endPtr - src) > (endPtr - optPtr) - * By initialization above (endPtr - optPtr) = TCL_UTF_MAX - 1 + * when (src <= optPtr), (endPtr - src) >= (endPtr - optPtr) + * By initialization above (endPtr - optPtr) = TCL_UTF_MAX * So (endPtr - src) >= TCL_UTF_MAX, and passing that to * Tcl_UtfCharComplete we know will cause return of 1. */ - while ((src < optPtr) + while (src <= optPtr /* && Tcl_UtfCharComplete(src, endPtr - src) */ ) { src += TclUtfToUniChar(src, &ch); i++; @@ -876,9 +882,9 @@ Tcl_UtfFindFirst( int ch) /* The Unicode character to search for. */ { while (1) { - int ucs4, len = TclUtfToUCS4(src, &ucs4); + int find, len = TclUtfToUCS4(src, &find); - if (ucs4 == ch) { + if (find == ch) { return src; } if (*src == '\0') { @@ -915,9 +921,9 @@ Tcl_UtfFindLast( const char *last = NULL; while (1) { - int ucs4, len = TclUtfToUCS4(src, &ucs4); + int find, len = TclUtfToUCS4(src, &find); - if (ucs4 == ch) { + if (find == ch) { last = src; } if (*src == '\0') { @@ -1065,7 +1071,7 @@ Tcl_UtfPrev( /* * trailBytesSeen > 0, so we can examine look[1] safely. - * Use that capability to screen out overlong sequences. + * Use that capability to screen out invalid sequences. */ if (Invalid(look)) { |
