Merge 8.5. Failing tests need examination and adjustment.

author: dgp <dgp@users.sourceforge.net> 2020-04-24 20:51:14 (GMT)
committer: dgp <dgp@users.sourceforge.net> 2020-04-24 20:51:14 (GMT)
commit: e41fff474338362ada285b42e9da856ba6502903 (patch)
tree: 53f915447a76f507afe28c74a3fe9d78a2069c4a
parent: 9b3252ab93bb1eda4a7f82664832fb03a04b41b9 (diff)
parent: fbfa513c23b05ae5deeaa0ff81ce8045967890c0 (diff)
download: tcl-e41fff474338362ada285b42e9da856ba6502903.zip
tcl-e41fff474338362ada285b42e9da856ba6502903.tar.gz
tcl-e41fff474338362ada285b42e9da856ba6502903.tar.bz2
3 files changed, 16 insertions, 33 deletions
diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c
index ed4e958..4390282 100644
--- a/generic/tclCompExpr.c
+++ b/generic/tclCompExpr.c
@@ -1885,6 +1885,7 @@ ParseLexeme(
 {
     const char *end;
     int scanned;
+    Tcl_UniChar ch;
     Tcl_Obj *literal = NULL;
     unsigned char byte;
 
@@ -2063,13 +2064,13 @@ ParseLexeme(
 
     if (!TclIsBareword(*start) || *start == '_') {
 	if (Tcl_UtfCharComplete(start, numBytes)) {
-	    scanned = TclUtfNext(start) - start;
+	    scanned = Tcl_UtfToUniChar(start, &ch);
 	} else {
 	    char utfBytes[TCL_UTF_MAX];
 
 	    memcpy(utfBytes, start, (size_t) numBytes);
 	    utfBytes[numBytes] = '\0';
-	    scanned = TclUtfNext(utfBytes) - utfBytes;
+	    scanned = Tcl_UtfToUniChar(utfBytes, &ch);
 	}
 	*lexemePtr = INVALID;
 	Tcl_DecrRefCount(literal);
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 96953e2..80a5a83 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -579,7 +579,7 @@ Tcl_NumUtfChars(
     int length)			/* The length of the string in bytes, or -1
 				 * for strlen(string). */
 {
-    const char *next;
+    Tcl_UniChar ch;
     register int i = 0;
 
     /*
@@ -591,35 +591,20 @@ Tcl_NumUtfChars(
 
     if (length < 0) {
 	while ((*src != '\0') && (i < INT_MAX)) {
-	    next = TclUtfNext(src);
-#if TCL_UTF_MAX > 4
+	    src += TclUtfToUniChar(src, &ch);
 	    i++;
-#else
-	    i += 1 + ((next - src) > 3);
-#endif
-	    src = next;
 	}
     } else {
 	register const char *endPtr = src + length - TCL_UTF_MAX;
 
 	while (src < endPtr) {
-	    next = TclUtfNext(src);
-#if TCL_UTF_MAX > 4
+	    src += TclUtfToUniChar(src, &ch);
 	    i++;
-#else
-	    i += 1 + ((next - src) > 3);
-#endif
-	    src = next;
 	}
 	endPtr += TCL_UTF_MAX;
 	while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) {
-	    next = TclUtfNext(src);
-#if TCL_UTF_MAX > 4
+	    src += TclUtfToUniChar(src, &ch);
 	    i++;
-#else
-	    i += 1 + ((next - src) > 3);
-#endif
-	    src = next;
 	}
 	if (src < endPtr) {
 	    i += endPtr - src;
@@ -946,19 +931,10 @@ Tcl_UtfAtIndex(
     register const char *src,	/* The UTF-8 string. */
     register int index)		/* The position of the desired character. */
 {
-    while (index-- > 0) {
-	const char *next = TclUtfNext(src);
+    Tcl_UniChar ch;
 
-#if TCL_UTF_MAX <= 4
-	/*
-	 * 4-byte sequences generate two UCS-2 code units in the
-	 * UTF-16 representation, so in the current indexing scheme
-	 * we need to account for an extra index (total of two).
-	 */
-	index -= ((next - src) > 3);
-#endif
-
-	src = next;
+    while (index-- > 0) {
+	src += TclUtfToUniChar(src, &ch);
     }
     return src;
 }
diff --git a/tests/utf.test b/tests/utf.test
index fc0766d..acdd50e 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -470,6 +470,12 @@ test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testu
 test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext ucs2} {
     testutfnext -bytestring \x80\x80\x80
 } 1
+test utf-6.125 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext {
+    testutfnext \xA0\xA0\xA0\xA0
+} 1
+test utf-6.126 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext {
+    testutfnext \x80\x80\x80\x80
+} 1
 
 test utf-7.1 {Tcl_UtfPrev} testutfprev {
     testutfprev {}
author	dgp <dgp@users.sourceforge.net>	2020-04-24 20:51:14 (GMT)
committer	dgp <dgp@users.sourceforge.net>	2020-04-24 20:51:14 (GMT)
commit	e41fff474338362ada285b42e9da856ba6502903 (patch)
tree	53f915447a76f507afe28c74a3fe9d78a2069c4a
parent	9b3252ab93bb1eda4a7f82664832fb03a04b41b9 (diff)
parent	fbfa513c23b05ae5deeaa0ff81ce8045967890c0 (diff)
download	tcl-e41fff474338362ada285b42e9da856ba6502903.zip tcl-e41fff474338362ada285b42e9da856ba6502903.tar.gz tcl-e41fff474338362ada285b42e9da856ba6502903.tar.bz2