diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-05-10 19:28:08 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-05-10 19:28:08 (GMT) |
commit | 0733f232745ac3cc9a3bd4913bd5ffb8b58378a5 (patch) | |
tree | c2825d5f4f6889b70cc3a789c36b53da74eb2668 /generic | |
parent | 1102f22c5a663ad68838f182b53a44d159ac090d (diff) | |
download | tcl-0733f232745ac3cc9a3bd4913bd5ffb8b58378a5.zip tcl-0733f232745ac3cc9a3bd4913bd5ffb8b58378a5.tar.gz tcl-0733f232745ac3cc9a3bd4913bd5ffb8b58378a5.tar.bz2 |
Tweak Invalid() function: No need for "return 0" twice in the function.
For start bytes F0-F4, case TCL_UTF_MAX=4, Tcl_UtfToUniChar() reads 3 bytes but only advances 1 byte. So Tcl_UtfCharComplete() must make sure 3 bytes are available, not 1. Adapt Tcl_UtfCharComplete() accordingly. No change for TCL_UTF_MAX=[3|6]
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tclUtf.c | 39 |
1 files changed, 31 insertions, 8 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index c0de80a..5e0b2e0 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -81,6 +81,30 @@ static const unsigned char totalBytes[256] = { 1,1,1,1,1,1,1,1,1,1,1 }; +static const unsigned char complete[256] = { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +#if TCL_UTF_MAX < 4 + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +#else /* Tcl_UtfCharComplete() might point to 2nd byte of valid 4-byte sequence */ + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +#endif + 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +#if TCL_UTF_MAX > 4 + 4,4,4,4,4, +#elif TCL_UTF_MAX < 4 + 1,1,1,1,1, +#else + 3,3,3,3,3, +#endif + 1,1,1,1,1,1,1,1,1,1,1 +}; + /* * Functions used only in this module. */ @@ -174,14 +198,13 @@ Invalid( unsigned char byte = UCHAR(*src); int index; - if ((byte & 0xC3) != 0xC0) { + if ((byte & 0xC3) == 0xC0) { /* Only lead bytes 0xC0, 0xE0, 0xF0, 0xF4 need examination */ - return 0; - } - index = (byte - 0xC0) >> 1; - if (UCHAR(src[1]) < bounds[index] || UCHAR(src[1]) > bounds[index+1]) { - /* Out of bounds - report invalid. */ - return 1; + index = (byte - 0xC0) >> 1; + if (UCHAR(src[1]) < bounds[index] || UCHAR(src[1]) > bounds[index+1]) { + /* Out of bounds - report invalid. */ + return 1; + } } return 0; } @@ -568,7 +591,7 @@ Tcl_UtfCharComplete( * a complete UTF-8 character. */ int length) /* Length of above string in bytes. */ { - return length >= totalBytes[UCHAR(*src)]; + return length >= complete[UCHAR(*src)]; } /* |