diff options
author | dgp <dgp@users.sourceforge.net> | 2020-05-06 19:48:34 (GMT) |
---|---|---|
committer | dgp <dgp@users.sourceforge.net> | 2020-05-06 19:48:34 (GMT) |
commit | 289b43aefdea045540bf95afbd07b55ec9c8a16e (patch) | |
tree | a71231c3acbb0fc376aa0e2344dd0ec00d944ea5 /generic | |
parent | 1a11b74770f279cecda0b4a31214558b9a78c229 (diff) | |
parent | 50fa02d30ebf14022416a55dc235e7e40d3c146b (diff) | |
download | tcl-289b43aefdea045540bf95afbd07b55ec9c8a16e.zip tcl-289b43aefdea045540bf95afbd07b55ec9c8a16e.tar.gz tcl-289b43aefdea045540bf95afbd07b55ec9c8a16e.tar.bz2 |
merge 8.6
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tclUtf.c | 37 |
1 files changed, 26 insertions, 11 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index ac6f0d8..0d78f03 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -134,15 +134,23 @@ TclUtfCount( * * Invalid -- * - * Utility routine to report whether /src/ points to the start of an - * invald byte sequence that should be rejected. This might be because - * it is an overlong encoding, or because it encodes something out of - * the proper range. Caller guarantees that src[0] and src[1] are - * readable, and + * Given a pointer to a two-byte prefix of a well-formed UTF-8 byte + * sequence (a lead byte followed by a trail byte) this routine + * examines those two bytes to determine whether the sequence is + * invalid in UTF-8. This might be because it is an overlong + * encoding, or because it encodes something out of the proper range. * - * (src[0] >= 0xC0) && (src[0] != 0xC1) - * (src[1] >= 0x80) && (src[1] < 0xC0) - * (src[0] < ((TCL_UTF_MAX > 3) ? 0xF5 : 0xF0)) + * Given a pointer to the bytes \xF8 or \xFC , this routine will + * try to read beyond the end of the "bounds" table. Callers must + * prevent this. + * + * Given a pointer to something else (an ASCII byte, a trail byte, + * or another byte that can never begin a valid byte sequence such + * as \xF5) this routine returns false. That makes the routine poorly + * named, as it does not detect and report all invalid sequences. + * + * Callers have to take care that this routine does something useful + * for their needs. * * Results: * A boolean. @@ -945,9 +953,6 @@ Tcl_UtfNext( return src; } - if (Invalid(src)) { - return src + 1; - } left = totalBytes[UCHAR(*src)]; next = src + 1; while (--left) { @@ -961,6 +966,16 @@ Tcl_UtfNext( } next++; } + /* + * Call Invalid() here only if required conditions are met: + * src[0] is known a lead byte. + * src[1] is known a trail byte. + * Especially important to prevent calls when src[0] == '\xF8' or '\xFC' + * See tests utf-6.37 through utf-6.43 through valgrind or similar tool. + */ + if ((next == src + 1) || Invalid(src)) { + return src + 1; + } return next; } |