summaryrefslogtreecommitdiffstats
path: root/generic/tclUtf.c
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2021-03-10 12:55:25 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2021-03-10 12:55:25 (GMT)
commita5a30fa3e2b30971c18a067291c9144bdd22199f (patch)
tree998f407935686779df6023407243473782819308 /generic/tclUtf.c
parent764ef9b409175a603666871249e5991dcb7cbd50 (diff)
downloadtcl-a5a30fa3e2b30971c18a067291c9144bdd22199f.zip
tcl-a5a30fa3e2b30971c18a067291c9144bdd22199f.tar.gz
tcl-a5a30fa3e2b30971c18a067291c9144bdd22199f.tar.bz2
TIP #597 implementation: "string is unicode" and new wtf-8 encoding
Diffstat (limited to 'generic/tclUtf.c')
-rw-r--r--generic/tclUtf.c30
1 files changed, 30 insertions, 0 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index e096c06..2687a1d 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -2187,6 +2187,36 @@ Tcl_UniCharIsUpper(
/*
*----------------------------------------------------------------------
*
+ * Tcl_UniCharIsUnicode --
+ *
+ * Test if a character is a Unicode character.
+ *
+ * Results:
+ * Returns non-zero if character belongs to the Unicode set.
+ *
+ * Excluded are:
+ * 1) All characters > U+10FFFF
+ * 2) Surrogates U+D800 - U+DFFF
+ * 3) Last 2 characters of each plane, so U+??FFFE and U+??FFFF
+ * 4) The characters in the range U+FDD0 - U+FDEF
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharIsUnicode(
+ int ch) /* Unicode character to test. */
+{
+ return ((unsigned int)ch <= 0x10FFFF) && ((ch & 0xFFF800) != 0xD800)
+ && ((ch & 0xFFFE) != 0xFFFE) && ((unsigned int)(ch - 0xFDD0) >= 32);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* Tcl_UniCharIsWordChar --
*
* Test if a character is alphanumeric or a connector punctuation mark.