diff options
| author | dgp <dgp@users.sourceforge.net> | 2020-04-15 22:39:53 (GMT) |
|---|---|---|
| committer | dgp <dgp@users.sourceforge.net> | 2020-04-15 22:39:53 (GMT) |
| commit | d491ca6385bb8c8e630dea43cc287b5a881233ce (patch) | |
| tree | 3ec5d5777535207c2446b5531cbe98f969ab6afb | |
| parent | 1f6e0698a6ecceec504384a0d228e56b2c1aba42 (diff) | |
| download | tcl-d491ca6385bb8c8e630dea43cc287b5a881233ce.zip tcl-d491ca6385bb8c8e630dea43cc287b5a881233ce.tar.gz tcl-d491ca6385bb8c8e630dea43cc287b5a881233ce.tar.bz2 | |
Refactor the Overlong test into a utility routine.
| -rw-r--r-- | generic/tclUtf.c | 85 |
1 files changed, 56 insertions, 29 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 6003b75..f3b2097 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -81,6 +81,7 @@ static CONST unsigned char totalBytes[256] = { */ static int UtfCount(int ch); +static int Overlong(unsigned char *src); /* *--------------------------------------------------------------------------- @@ -115,7 +116,59 @@ UtfCount( #endif return 3; } + +/* + *--------------------------------------------------------------------------- + * + * Overlong -- + * + * Utility routine to report whether /src/ points to the start of an + * overlong byte sequence that should be rejected. + * + * Results: + * A boolean. + *--------------------------------------------------------------------------- + */ +INLINE static int +Overlong( + unsigned char *src) /* Points to lead byte of a UTF-8 byte + * sequence. Caller guarantees it is safe + * to read src[0] and src[1]. */ +{ + switch (*src) { + case 0xC0: + if (src[1] == 0x80) { + /* Valid sequence: \xC0\x80 for \u0000 */ + return 0; + } + /* Reject overlong: \xC0\x81 - \xC0\xBF */ + return 1; + case 0xC1: + /* Reject overlong: \xC1\x80 - \xC1\xBF */ + return 1; + case 0xE0: + if (src[1] < 0xA0) { + /* Reject overlong: \xE0\x80\x80 - \xE0\x9F\xBF */ + return 1; + } + /* Valid sequence: \xE0\xA0\x80 for \u0800 , etc. */ + return 0; +#if TCL_UTF_MAX > 3 + case 0xF0: + if (src[1] < 0x90) { + /* Reject overlong: \xF0\x80\x80\x80 - \xF0\x8F\xBF\xBF */ + return 1; + } + /* Valid sequence: \xF0\x90\x80\x80 for \U10000 , etc. */ + return 0 +#endif + default: + /* All other lead bytes lead only valid sequences */ + return 0; + } +} + /* *--------------------------------------------------------------------------- * @@ -756,37 +809,11 @@ Tcl_UtfPrev( * Use that capability to screen out overlong sequences. */ - switch (byte) { - case 0xC0: - if (look[1] == 0x80) { - /* Valid sequence: \xC0\x80 for \u0000 */ - return (CONST char *)look; - } - /* Reject overlong: \xC0\x81 - \xC0\xBF */ + if (Overlong(look)) { + /* Reject */ return fallback; - case 0xC1: - /* Reject overlong: \xC1\x80 - \xC1\xBF */ - return fallback; - case 0xE0: - if (look[1] < 0xA0) { - /* Reject overlong: \xE0\x80\x80 - \xE0\x9F\xBF */ - return fallback; - } - /* Valid sequence: \xE0\xA0\x80 for \u0800 , etc. */ - return (CONST char *)look; -#if TCL_UTF_MAX > 3 - case 0xF0: - if (look[1] < 0x90) { - /* Reject overlong: \xF0\x80\x80\x80 - \xF0\x8F\xBF\xBF */ - return fallback; - } - /* Valid sequence: \xF0\x90\x80\x80 for \U10000 , etc. */ - return (CONST char *)look; -#endif - default: - /* All other lead bytes lead only valid sequences */ - return (CONST char *)look; } + return (CONST char *)look; } /* We saw a trail byte. */ |
