diff options
| -rw-r--r-- | generic/tclUtf.c | 18 | ||||
| -rw-r--r-- | tests/utf.test | 22 |
2 files changed, 31 insertions, 9 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 2a04414..b4f760f 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -852,9 +852,11 @@ Tcl_UtfFindLast( * * Tcl_UtfNext -- * - * Given a pointer to some current location in a UTF-8 string, move - * forward one character. The caller must ensure that they are not asking - * for the next character after the last character in the string. + * Given a pointer to some location in a UTF-8 string, Tcl_UtfNext + * returns a pointer to the next UTF-8 character in the string. + * The caller must not ask for the next character after the last + * character in the string if the string is not terminated by a null + * character. * * Results: * The return value is the pointer to the next character in the UTF-8 @@ -871,7 +873,15 @@ Tcl_UtfNext( const char *src) /* The current location in the string. */ { Tcl_UniChar ch = 0; - int len = TclUtfToUniChar(src, &ch); + int len; + + if (((*src) & 0xC0) == 0x80) { + if ((((*++src) & 0xC0) == 0x80) && (((*++src) & 0xC0) == 0x80)) { + ++src; + } + return src; + } + len = TclUtfToUniChar(src, &ch); #if TCL_UTF_MAX <= 3 if ((ch >= 0xD800) && (len < 3)) { diff --git a/tests/utf.test b/tests/utf.test index 150e395..f3633bd 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -187,7 +187,7 @@ test utf-6.10 {Tcl_UtfNext} testutfnext { } 1 test utf-6.11 {Tcl_UtfNext} testutfnext { testutfnext \xA0\xA0 -} 1 +} 2 test utf-6.12 {Tcl_UtfNext} testutfnext { testutfnext \xA0\xD0 } 1 @@ -418,22 +418,34 @@ test utf-6.87 {Tcl_UtfNext - overlong sequences} testutfnext { } 4 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} testutfnext { testutfnext \xA0\xA0 -} 1 +} 2 test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} testutfnext { testutfnext \xE8\xA0\xA0 1 -} 2 +} 3 test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} testutfnext { testutfnext \x80\x80 -} 1 +} 2 test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} testutfnext { testutfnext \xF0\x80\x80 1 -} 2 +} 3 test utf-6.90 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext { testutfnext \xF4\x8F\xBF\xBF } 4 test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} testutfnext { testutfnext \xF4\x90\x80\x80 } 1 +test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { + testutfnext \xA0\xA0\xA0 +} 3 +test utf-6.92.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { + testutfnext \xF2\xA0\xA0\xA0 1 +} 4 +test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { + testutfnext \x80\x80\x80 +} 3 +test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { + testutfnext \xF0\x80\x80\x80 1 +} 4 test utf-7.1 {Tcl_UtfPrev} testutfprev { testutfprev {} |
