2 files changed, 29 insertions, 31 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 1ba474e..aa949ca 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -716,31 +716,11 @@ Tcl_UtfFindLast(
  *
  * Tcl_UtfNext --
  *
- *	The aim of this routine is to provide a way to iterate forward
- *	through a UTF-8 string. The caller is expected to pass a non-NULL
- *	pointer argument /src/ which points to a location within a string.
- *	(*src) will be read, so /src/ must not point to an unreadable
- *	location past the end of the string. If /src/ points to the
- *	beginning of a complete, well-formed and valid UTF_8 byte sequence
- *	of no more than TCL_UTF_MAX bytes, Tcl_UtfNext returns the pointer
- *	just past the end of that sequence. In any other circumstance,
- *	Tcl_UtfNext returns /src/+1.
- *
- *	Because this routine always returns a value > /src/, it is useful
- *	as a forward iterator that will always make progress. If the string
- *	is NUL-terminated, Tcl_UtfNext will not read beyond the terminating
- *	NUL character. If it is not NUL-terminated, the caller must make
- *	use of the companion routine Tcl_UtfCharComplete to test whether
- *	there is risk that Tcl_UtfNext will read beyond the end of the string.
- *	Tcl_UtfNext will never read more than TCL_UTF_MAX bytes.
- *
- *	In a string where all characters are complete and properly formed,
- *	and /src/ points to the first byte of a character, repeated
- *	Tcl_UtfNext calls will step to the starting bytes of characters, one
- *	character at a time. Within those limitations, Tcl_UtfPrev and
- *	Tcl_UtfNext are inverses. If either condition cannot be met,
- *	Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and the
- *	caller will have to take greater care.
+ *  Given a pointer to some location in a UTF-8 string, Tcl_UtfNext
+ *  returns a pointer to the next UTF-8 character in the string.
+ *  The caller must not ask for the next character after the last
+ *	character in the string if the string is not terminated by a null
+ *  character.
  *
  * Results:
  *	A pointer to the start of the next character in the string (or to
@@ -760,13 +740,19 @@ Tcl_UtfNext(
     int left = totalBytes[byte];
     const char *next = src + 1;
 
+    if (((*src) & 0xC0) == 0x80) {
+	if ((((*++src) & 0xC0) == 0x80) && (((*++src) & 0xC0) == 0x80)) {
+	    ++src;
+	}
+	return src;
+    }
+
     while (--left) {
 	byte = *((unsigned char *) next);
 	if ((byte & 0xC0) != 0x80) {
 	    /*
 	     * src points to non-trail byte; We ran out of trail bytes
 	     * before the needs of the lead byte were satisfied.
-	     * Let the (malformed) lead byte alone be a character
 	     */
 	    return src + 1;
 	}
diff --git a/tests/utf.test b/tests/utf.test
index 0ba2b85..f56fabc 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -189,7 +189,7 @@ test utf-6.10 {Tcl_UtfNext} testutfnext {
 } 1
 test utf-6.11 {Tcl_UtfNext} testutfnext {
     testutfnext \xA0\xA0
-} 1
+} 2
 test utf-6.12 {Tcl_UtfNext} testutfnext {
     testutfnext \xA0\xD0
 } 1
@@ -420,18 +420,30 @@ test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext} {
 } 4
 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
     testutfnext \xA0\xA0
-} 1
+} 2
 test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
     testutfnext \xE8\xA0\xA0 1
-} 2
+} 3
 test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
     testutfnext \x80\x80
-} 1
+} 2
 test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
     testutfnext \xF0\x80\x80 1
-} 2
+} 3
 
 testConstraint testutfprev [llength [info commands testutfprev]]
+test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext {
+    testutfnext \xA0\xA0\xA0
+} 3
+test utf-6.92.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext {
+    testutfnext \xF2\xA0\xA0\xA0 1
+} 4
+test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext {
+    testutfnext \x80\x80\x80
+} 3
+test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext {
+    testutfnext \xF0\x80\x80\x80 1
+} 4
 
 test utf-7.1 {Tcl_UtfPrev} testutfprev {
     testutfprev {}