diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2021-03-10 12:55:25 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2021-03-10 12:55:25 (GMT) |
commit | a5a30fa3e2b30971c18a067291c9144bdd22199f (patch) | |
tree | 998f407935686779df6023407243473782819308 /generic/tclUtf.c | |
parent | 764ef9b409175a603666871249e5991dcb7cbd50 (diff) | |
download | tcl-a5a30fa3e2b30971c18a067291c9144bdd22199f.zip tcl-a5a30fa3e2b30971c18a067291c9144bdd22199f.tar.gz tcl-a5a30fa3e2b30971c18a067291c9144bdd22199f.tar.bz2 |
TIP #597 implementation: "string is unicode" and new wtf-8 encoding
Diffstat (limited to 'generic/tclUtf.c')
-rw-r--r-- | generic/tclUtf.c | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index e096c06..2687a1d 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -2187,6 +2187,36 @@ Tcl_UniCharIsUpper( /* *---------------------------------------------------------------------- * + * Tcl_UniCharIsUnicode -- + * + * Test if a character is a Unicode character. + * + * Results: + * Returns non-zero if character belongs to the Unicode set. + * + * Excluded are: + * 1) All characters > U+10FFFF + * 2) Surrogates U+D800 - U+DFFF + * 3) Last 2 characters of each plane, so U+??FFFE and U+??FFFF + * 4) The characters in the range U+FDD0 - U+FDEF + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +Tcl_UniCharIsUnicode( + int ch) /* Unicode character to test. */ +{ + return ((unsigned int)ch <= 0x10FFFF) && ((ch & 0xFFF800) != 0xD800) + && ((ch & 0xFFFE) != 0xFFFE) && ((unsigned int)(ch - 0xFDD0) >= 32); +} + +/* + *---------------------------------------------------------------------- + * * Tcl_UniCharIsWordChar -- * * Test if a character is alphanumeric or a connector punctuation mark. |