summaryrefslogtreecommitdiffstats
path: root/generic/tclUtf.c
diff options
context:
space:
mode:
authorstanton <stanton>1999-05-22 01:20:10 (GMT)
committerstanton <stanton>1999-05-22 01:20:10 (GMT)
commitac39508cf97576cd9747c5630c4a13d794663b4a (patch)
tree4b7c61e6c670f227cf4d603907157fb6246d2d50 /generic/tclUtf.c
parent21bd132482f68735f5a4381934f56ee911904e87 (diff)
downloadtcl-ac39508cf97576cd9747c5630c4a13d794663b4a.zip
tcl-ac39508cf97576cd9747c5630c4a13d794663b4a.tar.gz
tcl-ac39508cf97576cd9747c5630c4a13d794663b4a.tar.bz2
Merged changes from scriptics-tclpro-1-3-b2 branch
Diffstat (limited to 'generic/tclUtf.c')
-rw-r--r--generic/tclUtf.c108
1 files changed, 107 insertions, 1 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 2361a2e..635ffbe 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclUtf.c,v 1.6 1999/05/20 23:40:34 hershey Exp $
+ * RCS: @(#) $Id: tclUtf.c,v 1.7 1999/05/22 01:20:13 stanton Exp $
*/
#include "tclInt.h"
@@ -35,6 +35,16 @@
#define CONNECTOR_BITS (1 << CONNECTOR_PUNCTUATION)
+#define PRINT_BITS (ALPHA_BITS | DIGIT_BITS | SPACE_BITS | \
+ (1 << NON_SPACING_MARK) | (1 << ENCLOSING_MARK) | \
+ (1 << COMBINING_SPACING_MARK) | (1 << LETTER_NUMBER) | \
+ (1 << OTHER_NUMBER) | (1 << CONNECTOR_PUNCTUATION) | \
+ (1 << DASH_PUNCTUATION) | (1 << OPEN_PUNCTUATION) | \
+ (1 << CLOSE_PUNCTUATION) | (1 << INITIAL_QUOTE_PUNCTUATION) | \
+ (1 << FINAL_QUOTE_PUNCTUATION) | (1 << OTHER_PUNCTUATION) | \
+ (1 << MATH_SYMBOL) | (1 << CURRENCY_SYMBOL) | \
+ (1 << MODIFIER_SYMBOL) | (1 << OTHER_SYMBOL))
+
/*
* Unicode characters less than this value are represented by themselves
* in UTF-8 strings.
@@ -1341,6 +1351,29 @@ Tcl_UniCharIsAlpha(ch)
/*
*----------------------------------------------------------------------
*
+ * Tcl_UniCharIsControl --
+ *
+ * Test if a character is a Unicode control character.
+ *
+ * Results:
+ * Returns non-zero if character is a control.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharIsControl(ch)
+ int ch; /* Unicode character to test. */
+{
+ return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == CONTROL);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* Tcl_UniCharIsDigit --
*
* Test if a character is a numeric Unicode character.
@@ -1365,6 +1398,30 @@ Tcl_UniCharIsDigit(ch)
/*
*----------------------------------------------------------------------
*
+ * Tcl_UniCharIsGraph --
+ *
+ * Test if a character is any Unicode print character except space.
+ *
+ * Results:
+ * Returns non-zero if character is printable, but not space.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharIsGraph(ch)
+ int ch; /* Unicode character to test. */
+{
+ register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
+ return (((PRINT_BITS >> category) & 1) && ((unsigned char) ch != ' '));
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* Tcl_UniCharIsLower --
*
* Test if a character is a lowercase Unicode character.
@@ -1388,6 +1445,55 @@ Tcl_UniCharIsLower(ch)
/*
*----------------------------------------------------------------------
*
+ * Tcl_UniCharIsPrint --
+ *
+ * Test if a character is a Unicode print character.
+ *
+ * Results:
+ * Returns non-zero if character is printable.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharIsPrint(ch)
+ int ch; /* Unicode character to test. */
+{
+ register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
+ return ((PRINT_BITS >> category) & 1);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_UniCharIsPunct --
+ *
+ * Test if for any printing char that is neither space or an alnum.
+ *
+ * Results:
+ * Returns non-zero if character is punct.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharIsPunct(ch)
+ int ch; /* Unicode character to test. */
+{
+ register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);
+ return (((PRINT_BITS >> category) & 1) && ((unsigned char) ch != ' ')
+ && !(((ALPHA_BITS | DIGIT_BITS) >> category) & 1));
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* Tcl_UniCharIsSpace --
*
* Test if a character is a whitespace Unicode character.