diff options
Diffstat (limited to 'generic/tclUtf.c')
| -rw-r--r-- | generic/tclUtf.c | 58 | 
1 files changed, 23 insertions, 35 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 40f6272..ff1f120 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -26,28 +26,27 @@  #define ALPHA_BITS ((1 << UPPERCASE_LETTER) | (1 << LOWERCASE_LETTER) \  	| (1 << TITLECASE_LETTER) | (1 << MODIFIER_LETTER) | (1<<OTHER_LETTER)) +#define CONTROL_BITS ((1 << CONTROL) | (1 << FORMAT) | (1 << PRIVATE_USE)) +  #define DIGIT_BITS (1 << DECIMAL_DIGIT_NUMBER)  #define SPACE_BITS ((1 << SPACE_SEPARATOR) | (1 << LINE_SEPARATOR) \  	| (1 << PARAGRAPH_SEPARATOR)) -#define CONNECTOR_BITS (1 << CONNECTOR_PUNCTUATION) - -#define PRINT_BITS (ALPHA_BITS | DIGIT_BITS | SPACE_BITS | \ -	(1 << NON_SPACING_MARK) | (1 << ENCLOSING_MARK) | \ -	(1 << COMBINING_SPACING_MARK) | (1 << LETTER_NUMBER) | \ -	(1 << OTHER_NUMBER) | (1 << CONNECTOR_PUNCTUATION) | \ -	(1 << DASH_PUNCTUATION) | (1 << OPEN_PUNCTUATION) | \ -	(1 << CLOSE_PUNCTUATION) | (1 << INITIAL_QUOTE_PUNCTUATION) | \ -	(1 << FINAL_QUOTE_PUNCTUATION) | (1 << OTHER_PUNCTUATION) | \ -	(1 << MATH_SYMBOL) | (1 << CURRENCY_SYMBOL) | \ -	(1 << MODIFIER_SYMBOL) | (1 << OTHER_SYMBOL)) +#define WORD_BITS (ALPHA_BITS | DIGIT_BITS | (1 << CONNECTOR_PUNCTUATION))  #define PUNCT_BITS ((1 << CONNECTOR_PUNCTUATION) | \  	(1 << DASH_PUNCTUATION) | (1 << OPEN_PUNCTUATION) | \  	(1 << CLOSE_PUNCTUATION) | (1 << INITIAL_QUOTE_PUNCTUATION) | \  	(1 << FINAL_QUOTE_PUNCTUATION) | (1 << OTHER_PUNCTUATION)) +#define GRAPH_BITS (WORD_BITS | PUNCT_BITS | \ +	(1 << NON_SPACING_MARK) | (1 << ENCLOSING_MARK) | \ +	(1 << COMBINING_SPACING_MARK) | (1 << LETTER_NUMBER) | \ +	(1 << OTHER_NUMBER) | \ +	(1 << MATH_SYMBOL) | (1 << CURRENCY_SYMBOL) | \ +	(1 << MODIFIER_SYMBOL) | (1 << OTHER_SYMBOL)) +  /*   * Unicode characters less than this value are represented by themselves in   * UTF-8 strings. @@ -1328,9 +1327,7 @@ int  Tcl_UniCharIsAlnum(      int ch)			/* Unicode character to test. */  { -    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); - -    return (((ALPHA_BITS | DIGIT_BITS) >> category) & 1); +    return (((ALPHA_BITS | DIGIT_BITS) >> GetCategory(ch)) & 1);  }  /* @@ -1353,8 +1350,7 @@ int  Tcl_UniCharIsAlpha(      int ch)			/* Unicode character to test. */  { -    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); -    return ((ALPHA_BITS >> category) & 1); +    return ((ALPHA_BITS >> GetCategory(ch)) & 1);  }  /* @@ -1377,7 +1373,7 @@ int  Tcl_UniCharIsControl(      int ch)			/* Unicode character to test. */  { -    return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == CONTROL); +    return ((CONTROL_BITS >> GetCategory(ch)) & 1);  }  /* @@ -1400,7 +1396,7 @@ int  Tcl_UniCharIsDigit(      int ch)			/* Unicode character to test. */  { -    return (GetUniCharInfo(ch)&UNICODE_CATEGORY_MASK) == DECIMAL_DIGIT_NUMBER; +    return (GetCategory(ch) == DECIMAL_DIGIT_NUMBER);  }  /* @@ -1423,8 +1419,7 @@ int  Tcl_UniCharIsGraph(      int ch)			/* Unicode character to test. */  { -    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); -    return (((PRINT_BITS >> category) & 1) && (ch != ' ')); +    return ((GRAPH_BITS >> GetCategory(ch)) & 1);  }  /* @@ -1447,7 +1442,7 @@ int  Tcl_UniCharIsLower(      int ch)			/* Unicode character to test. */  { -    return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == LOWERCASE_LETTER); +    return (GetCategory(ch) == LOWERCASE_LETTER);  }  /* @@ -1470,8 +1465,7 @@ int  Tcl_UniCharIsPrint(      int ch)			/* Unicode character to test. */  { -    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); -    return ((PRINT_BITS >> category) & 1); +    return (((GRAPH_BITS|SPACE_BITS) >> GetCategory(ch)) & 1);  }  /* @@ -1494,8 +1488,7 @@ int  Tcl_UniCharIsPunct(      int ch)			/* Unicode character to test. */  { -    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); -    return ((PUNCT_BITS >> category) & 1); +    return ((PUNCT_BITS >> GetCategory(ch)) & 1);  }  /* @@ -1518,18 +1511,15 @@ int  Tcl_UniCharIsSpace(      int ch)			/* Unicode character to test. */  { -    register int category; -      /*       * If the character is within the first 127 characters, just use the       * standard C function, otherwise consult the Unicode table.       */ -    if (ch < 0x80) { -	return TclIsSpaceProc((char)ch); +    if (((Tcl_UniChar) ch) < ((Tcl_UniChar) 0x80)) { +	return isspace(UCHAR(ch)); /* INTL: ISO space */      } else { -	category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); -	return ((SPACE_BITS >> category) & 1); +	return ((SPACE_BITS >> GetCategory(ch)) & 1);      }  } @@ -1553,7 +1543,7 @@ int  Tcl_UniCharIsUpper(      int ch)			/* Unicode character to test. */  { -    return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == UPPERCASE_LETTER); +    return (GetCategory(ch) == UPPERCASE_LETTER);  }  /* @@ -1576,9 +1566,7 @@ int  Tcl_UniCharIsWordChar(      int ch)			/* Unicode character to test. */  { -    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); - -    return (((ALPHA_BITS | DIGIT_BITS | CONNECTOR_BITS) >> category) & 1); +    return ((WORD_BITS >> GetCategory(ch)) & 1);  }  /*  | 
