summaryrefslogtreecommitdiffstats
path: root/generic/tclUtf.c
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2020-05-07 10:56:39 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2020-05-07 10:56:39 (GMT)
commit062c99cefca5d0755a750c751d3e7b452294878e (patch)
tree3960d4170c5e6f1bca1caa1baed5c877f2b49d04 /generic/tclUtf.c
parent402b2af43a54b47748174e1ff5b246fab3d61c60 (diff)
parentfb50148fcc9023ca550a1017a17545ae9469699d (diff)
downloadtcl-062c99cefca5d0755a750c751d3e7b452294878e.zip
tcl-062c99cefca5d0755a750c751d3e7b452294878e.tar.gz
tcl-062c99cefca5d0755a750c751d3e7b452294878e.tar.bz2
Merge 8.6
Diffstat (limited to 'generic/tclUtf.c')
-rw-r--r--generic/tclUtf.c74
1 files changed, 40 insertions, 34 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 3cf64cf..7c3416c 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -539,7 +539,7 @@ Tcl_UtfToChar16(
* Unroll 1 to 4 byte UTF-8 sequences.
*/
- byte = *((unsigned char *) src);
+ byte = UCHAR(*src);
if (byte < 0xC0) {
/*
* Handles properly formed UTF-8 characters between 0x01 and 0x7F.
@@ -656,8 +656,12 @@ Tcl_UtfToUniCharDString(
* DString. */
{
int ch = 0, *w, *wString;
- const char *p, *end;
+ const char *p;
int oldLength;
+ /* Pointer to the end of string. Never read endPtr[0] */
+ const char *endPtr = src + length;
+ /* Pointer to last byte where optimization still can be used */
+ const char *optPtr = endPtr - TCL_UTF_MAX;
if (src == NULL) {
return NULL;
@@ -679,20 +683,19 @@ Tcl_UtfToUniCharDString(
w = wString;
p = src;
- end = src + length - 4;
- while (p < end) {
- p += Tcl_UtfToUniChar(p, &ch);
+ endPtr = src + length;
+ optPtr = endPtr - 4;
+ while (p <= optPtr) {
+ p += TclUtfToUCS4(p, &ch);
*w++ = ch;
}
- end += 4;
- while (p < end) {
- if (Tcl_UtfCharComplete(p, end-p)) {
- p += Tcl_UtfToUniChar(p, &ch);
- } else {
- ch = UCHAR(*p++);
- }
+ while ((p < endPtr) && TclUCS4Complete(p, endPtr-p)) {
+ p += TclUtfToUCS4(p, &ch);
*w++ = ch;
}
+ while (p < endPtr) {
+ *w++ = UCHAR(*p++);
+ }
*w = '\0';
Tcl_DStringSetLength(dsPtr,
oldLength + ((char *) w - (char *) wString));
@@ -709,10 +712,13 @@ Tcl_UtfToChar16DString(
* appended to this previously initialized
* DString. */
{
- unsigned short ch = 0;
- unsigned short *w, *wString;
- const char *p, *end;
+ unsigned short ch = 0, *w, *wString;
+ const char *p;
int oldLength;
+ /* Pointer to the end of string. Never read endPtr[0] */
+ const char *endPtr = src + length;
+ /* Pointer to last byte where optimization still can be used */
+ const char *optPtr = endPtr - TCL_UTF_MAX;
if (src == NULL) {
return NULL;
@@ -734,26 +740,26 @@ Tcl_UtfToChar16DString(
w = wString;
p = src;
- end = src + length - 4;
- while (p < end) {
+ endPtr = src + length;
+ optPtr = endPtr - 3;
+ while (p <= optPtr) {
p += Tcl_UtfToChar16(p, &ch);
*w++ = ch;
}
- end += 4;
- while (p < end) {
- if (Tcl_UtfCharComplete(p, end-p)) {
- p += Tcl_UtfToChar16(p, &ch);
- } else {
- ch = UCHAR(*p++);
- }
+ while ((p < endPtr) && TclChar16Complete(p, endPtr-p)) {
+ p += Tcl_UtfToChar16(p, &ch);
*w++ = ch;
}
+ while (p < endPtr) {
+ *w++ = UCHAR(*p++);
+ }
*w = '\0';
Tcl_DStringSetLength(dsPtr,
oldLength + ((char *) w - (char *) wString));
return wString;
}
+
/*
*---------------------------------------------------------------------------
*
@@ -820,17 +826,17 @@ Tcl_NumUtfChars(
/* Pointer to the end of string. Never read endPtr[0] */
const char *endPtr = src + length;
- /* Pointer to breakpoint in scan where optimization is lost */
- const char *optPtr = endPtr - TCL_UTF_MAX + 1;
+ /* Pointer to last byte where optimization still can be used */
+ const char *optPtr = endPtr - TCL_UTF_MAX;
/*
* Optimize away the call in this loop. Justified because...
- * when (src < optPtr), (endPtr - src) > (endPtr - optPtr)
- * By initialization above (endPtr - optPtr) = TCL_UTF_MAX - 1
+ * when (src <= optPtr), (endPtr - src) >= (endPtr - optPtr)
+ * By initialization above (endPtr - optPtr) = TCL_UTF_MAX
* So (endPtr - src) >= TCL_UTF_MAX, and passing that to
* Tcl_UtfCharComplete we know will cause return of 1.
*/
- while ((src < optPtr)
+ while (src <= optPtr
/* && Tcl_UtfCharComplete(src, endPtr - src) */ ) {
src += TclUtfToUniChar(src, &ch);
i++;
@@ -876,9 +882,9 @@ Tcl_UtfFindFirst(
int ch) /* The Unicode character to search for. */
{
while (1) {
- int ucs4, len = TclUtfToUCS4(src, &ucs4);
+ int find, len = TclUtfToUCS4(src, &find);
- if (ucs4 == ch) {
+ if (find == ch) {
return src;
}
if (*src == '\0') {
@@ -915,9 +921,9 @@ Tcl_UtfFindLast(
const char *last = NULL;
while (1) {
- int ucs4, len = TclUtfToUCS4(src, &ucs4);
+ int find, len = TclUtfToUCS4(src, &find);
- if (ucs4 == ch) {
+ if (find == ch) {
last = src;
}
if (*src == '\0') {
@@ -1065,7 +1071,7 @@ Tcl_UtfPrev(
/*
* trailBytesSeen > 0, so we can examine look[1] safely.
- * Use that capability to screen out overlong sequences.
+ * Use that capability to screen out invalid sequences.
*/
if (Invalid(look)) {