diff options
Diffstat (limited to 'generic/tclUtf.c')
-rw-r--r-- | generic/tclUtf.c | 58 |
1 files changed, 23 insertions, 35 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 40f6272..ff1f120 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -26,28 +26,27 @@ #define ALPHA_BITS ((1 << UPPERCASE_LETTER) | (1 << LOWERCASE_LETTER) \ | (1 << TITLECASE_LETTER) | (1 << MODIFIER_LETTER) | (1<<OTHER_LETTER)) +#define CONTROL_BITS ((1 << CONTROL) | (1 << FORMAT) | (1 << PRIVATE_USE)) + #define DIGIT_BITS (1 << DECIMAL_DIGIT_NUMBER) #define SPACE_BITS ((1 << SPACE_SEPARATOR) | (1 << LINE_SEPARATOR) \ | (1 << PARAGRAPH_SEPARATOR)) -#define CONNECTOR_BITS (1 << CONNECTOR_PUNCTUATION) - -#define PRINT_BITS (ALPHA_BITS | DIGIT_BITS | SPACE_BITS | \ - (1 << NON_SPACING_MARK) | (1 << ENCLOSING_MARK) | \ - (1 << COMBINING_SPACING_MARK) | (1 << LETTER_NUMBER) | \ - (1 << OTHER_NUMBER) | (1 << CONNECTOR_PUNCTUATION) | \ - (1 << DASH_PUNCTUATION) | (1 << OPEN_PUNCTUATION) | \ - (1 << CLOSE_PUNCTUATION) | (1 << INITIAL_QUOTE_PUNCTUATION) | \ - (1 << FINAL_QUOTE_PUNCTUATION) | (1 << OTHER_PUNCTUATION) | \ - (1 << MATH_SYMBOL) | (1 << CURRENCY_SYMBOL) | \ - (1 << MODIFIER_SYMBOL) | (1 << OTHER_SYMBOL)) +#define WORD_BITS (ALPHA_BITS | DIGIT_BITS | (1 << CONNECTOR_PUNCTUATION)) #define PUNCT_BITS ((1 << CONNECTOR_PUNCTUATION) | \ (1 << DASH_PUNCTUATION) | (1 << OPEN_PUNCTUATION) | \ (1 << CLOSE_PUNCTUATION) | (1 << INITIAL_QUOTE_PUNCTUATION) | \ (1 << FINAL_QUOTE_PUNCTUATION) | (1 << OTHER_PUNCTUATION)) +#define GRAPH_BITS (WORD_BITS | PUNCT_BITS | \ + (1 << NON_SPACING_MARK) | (1 << ENCLOSING_MARK) | \ + (1 << COMBINING_SPACING_MARK) | (1 << LETTER_NUMBER) | \ + (1 << OTHER_NUMBER) | \ + (1 << MATH_SYMBOL) | (1 << CURRENCY_SYMBOL) | \ + (1 << MODIFIER_SYMBOL) | (1 << OTHER_SYMBOL)) + /* * Unicode characters less than this value are represented by themselves in * UTF-8 strings. @@ -1328,9 +1327,7 @@ int Tcl_UniCharIsAlnum( int ch) /* Unicode character to test. */ { - register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); - - return (((ALPHA_BITS | DIGIT_BITS) >> category) & 1); + return (((ALPHA_BITS | DIGIT_BITS) >> GetCategory(ch)) & 1); } /* @@ -1353,8 +1350,7 @@ int Tcl_UniCharIsAlpha( int ch) /* Unicode character to test. */ { - register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); - return ((ALPHA_BITS >> category) & 1); + return ((ALPHA_BITS >> GetCategory(ch)) & 1); } /* @@ -1377,7 +1373,7 @@ int Tcl_UniCharIsControl( int ch) /* Unicode character to test. */ { - return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == CONTROL); + return ((CONTROL_BITS >> GetCategory(ch)) & 1); } /* @@ -1400,7 +1396,7 @@ int Tcl_UniCharIsDigit( int ch) /* Unicode character to test. */ { - return (GetUniCharInfo(ch)&UNICODE_CATEGORY_MASK) == DECIMAL_DIGIT_NUMBER; + return (GetCategory(ch) == DECIMAL_DIGIT_NUMBER); } /* @@ -1423,8 +1419,7 @@ int Tcl_UniCharIsGraph( int ch) /* Unicode character to test. */ { - register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); - return (((PRINT_BITS >> category) & 1) && (ch != ' ')); + return ((GRAPH_BITS >> GetCategory(ch)) & 1); } /* @@ -1447,7 +1442,7 @@ int Tcl_UniCharIsLower( int ch) /* Unicode character to test. */ { - return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == LOWERCASE_LETTER); + return (GetCategory(ch) == LOWERCASE_LETTER); } /* @@ -1470,8 +1465,7 @@ int Tcl_UniCharIsPrint( int ch) /* Unicode character to test. */ { - register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); - return ((PRINT_BITS >> category) & 1); + return (((GRAPH_BITS|SPACE_BITS) >> GetCategory(ch)) & 1); } /* @@ -1494,8 +1488,7 @@ int Tcl_UniCharIsPunct( int ch) /* Unicode character to test. */ { - register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); - return ((PUNCT_BITS >> category) & 1); + return ((PUNCT_BITS >> GetCategory(ch)) & 1); } /* @@ -1518,18 +1511,15 @@ int Tcl_UniCharIsSpace( int ch) /* Unicode character to test. */ { - register int category; - /* * If the character is within the first 127 characters, just use the * standard C function, otherwise consult the Unicode table. */ - if (ch < 0x80) { - return TclIsSpaceProc((char)ch); + if (((Tcl_UniChar) ch) < ((Tcl_UniChar) 0x80)) { + return isspace(UCHAR(ch)); /* INTL: ISO space */ } else { - category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); - return ((SPACE_BITS >> category) & 1); + return ((SPACE_BITS >> GetCategory(ch)) & 1); } } @@ -1553,7 +1543,7 @@ int Tcl_UniCharIsUpper( int ch) /* Unicode character to test. */ { - return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == UPPERCASE_LETTER); + return (GetCategory(ch) == UPPERCASE_LETTER); } /* @@ -1576,9 +1566,7 @@ int Tcl_UniCharIsWordChar( int ch) /* Unicode character to test. */ { - register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); - - return (((ALPHA_BITS | DIGIT_BITS | CONNECTOR_BITS) >> category) & 1); + return ((WORD_BITS >> GetCategory(ch)) & 1); } /* |