From d6641aab890671957021803671e40455649ea3dd Mon Sep 17 00:00:00 2001 From: stanton Date: Fri, 16 Oct 1998 01:16:57 +0000 Subject: Added Unicode character table support: added TclUniCharIsWordChar tclCmdMZ.c (Tcl_StringObjCmd): added "totitle" subcommand, changed "wordend" and "wordstart" to properly handle Unicode word characters and connector punctuation --- generic/tclCmdMZ.c | 59 ++--- generic/tclInt.h | 3 +- generic/tclUtf.c | 180 ++++++++------- generic/tclUtf.h | 625 +++++++++++++++++++++++++++++++++++++++++++++++++++++ tests/cmdMZ.test | 159 +++++++------- tests/utf.test | 114 ++++++++-- tools/uniParse.tcl | 364 +++++++++++++++++++++++++++++++ 7 files changed, 1287 insertions(+), 217 deletions(-) create mode 100644 generic/tclUtf.h create mode 100644 tools/uniParse.tcl diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 12bb1c6..8a3b6d5 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -12,7 +12,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdMZ.c,v 1.1.2.2 1998/09/24 23:58:43 stanton Exp $ + * RCS: @(#) $Id: tclCmdMZ.c,v 1.1.2.3 1998/10/16 01:16:57 stanton Exp $ */ #include "tclInt.h" @@ -1118,14 +1118,14 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) static char *options[] = { "compare", "first", "index", "last", "length", "match", "range", "tolower", - "toupper", "trim", "trimleft", "trimright", - "wordend", "wordstart", (char *) NULL + "toupper", "totitle", "trim", "trimleft", + "trimright", "wordend", "wordstart", (char *) NULL }; enum options { STR_COMPARE, STR_FIRST, STR_INDEX, STR_LAST, STR_LENGTH, STR_MATCH, STR_RANGE, STR_TOLOWER, - STR_TOUPPER, STR_TRIM, STR_TRIMLEFT, STR_TRIMRIGHT, - STR_WORDEND, STR_WORDSTART + STR_TOUPPER, STR_TOTITLE, STR_TRIM, STR_TRIMLEFT, + STR_TRIMRIGHT, STR_WORDEND, STR_WORDSTART }; if (objc < 2) { @@ -1329,7 +1329,9 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) } break; } - case STR_TOLOWER: { + case STR_TOLOWER: + case STR_TOUPPER: + case STR_TOTITLE: if (objc != 3) { Tcl_WrongNumArgs(interp, 2, objv, "string"); return TCL_ERROR; @@ -1341,36 +1343,20 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) * Since the result object is not a shared object, it is * safe to copy the string into the result and do the * conversion in place. The conversion may change the length - * of the string, so reset the length after converstion. - * the starting size, so reset the length after conversion. + * of the string, so reset the length after conversion. */ Tcl_SetStringObj(resultPtr, string1, length1); - length1 = Tcl_UtfToLower(Tcl_GetStringFromObj(resultPtr, NULL)); - Tcl_SetObjLength(resultPtr, length1); - break; - } - case STR_TOUPPER: { - if (objc != 3) { - Tcl_WrongNumArgs(interp, 2, objv, "string"); - return TCL_ERROR; + if ((enum options) index == STR_TOLOWER) { + length1 = Tcl_UtfToLower(Tcl_GetStringFromObj(resultPtr, NULL)); + } else if ((enum options) index == STR_TOUPPER) { + length1 = Tcl_UtfToUpper(Tcl_GetStringFromObj(resultPtr, NULL)); + } else { + length1 = Tcl_UtfToTitle(Tcl_GetStringFromObj(resultPtr, NULL)); } - - string1 = Tcl_GetStringFromObj(objv[2], &length1); - - /* - * Since the result object is not a shared object, it is - * safe to copy the string into the result and do the - * conversion in place. The conversion may change the length - * of the string, so reset the length after converstion. - * the starting size, so reset the length after conversion. - */ - - Tcl_SetStringObj(resultPtr, string1, length1); - length1 = Tcl_UtfToUpper(Tcl_GetStringFromObj(resultPtr, NULL)); Tcl_SetObjLength(resultPtr, length1); break; - } + case STR_TRIM: { Tcl_UniChar ch, trim; register char *p, *end; @@ -1460,7 +1446,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) goto dotrim; } case STR_WORDEND: { - int cur, c; + int cur; Tcl_UniChar ch; char *p, *end; int numChars; @@ -1483,11 +1469,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) end = string1+length1; for (cur = index; p < end; cur++) { p += Tcl_UtfToUniChar(p, &ch); - if (ch > 0xff) { - break; - } - c = UCHAR(ch); - if (!isalnum(c) && (c != '_')) { /* INTL: ISO only */ + if (!TclUniCharIsWordChar(ch)) { break; } } @@ -1501,7 +1483,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) break; } case STR_WORDSTART: { - int cur, c; + int cur; Tcl_UniChar ch; char *p; int numChars; @@ -1524,8 +1506,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) p = Tcl_UtfAtIndex(string1, index); for (cur = index; cur >= 0; cur--) { Tcl_UtfToUniChar(p, &ch); - c = UCHAR(ch); - if (!isalnum(c) && (c != '_')) { /* INTL: ISO only */ + if (!TclUniCharIsWordChar(ch)) { break; } p = Tcl_UtfPrev(p, string1); diff --git a/generic/tclInt.h b/generic/tclInt.h index 6df066c..140a2eb 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -10,7 +10,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclInt.h,v 1.1.2.5 1998/10/06 20:39:31 rjohnson Exp $ + * RCS: @(#) $Id: tclInt.h,v 1.1.2.6 1998/10/16 01:16:57 stanton Exp $ */ #ifndef _TCLINT @@ -2106,6 +2106,7 @@ EXTERN int TclUniCharIsDigit _ANSI_ARGS_((int ch)); EXTERN int TclUniCharIsLower _ANSI_ARGS_((int ch)); EXTERN int TclUniCharIsSpace _ANSI_ARGS_((int ch)); EXTERN int TclUniCharIsUpper _ANSI_ARGS_((int ch)); +EXTERN int TclUniCharIsWordChar _ANSI_ARGS_((int ch)); EXTERN int TclUniCharLen _ANSI_ARGS_((Tcl_UniChar *str)); EXTERN int TclUniCharNcmp _ANSI_ARGS_((const Tcl_UniChar *cs, const Tcl_UniChar *ct, size_t n)); diff --git a/generic/tclUtf.c b/generic/tclUtf.c index ee9f59a..9027921 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -8,10 +8,27 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclUtf.c,v 1.1.2.2 1998/10/03 01:56:42 stanton Exp $ + * RCS: @(#) $Id: tclUtf.c,v 1.1.2.3 1998/10/16 01:16:57 stanton Exp $ */ #include "tclInt.h" +#include "TclUtf.h" + +/* + * The following macros are used for fast character category tests. The + * x_BITS values are shifted right by the category value to determine whether + * the given category is included in the set. + */ + +#define ALPHA_BITS ((1 << UPPERCASE_LETTER) | (1 << LOWERCASE_LETTER) \ + | (1 << TITLECASE_LETTER) | (1 << MODIFIER_LETTER) | (1 << OTHER_LETTER)) + +#define DIGIT_BITS (1 << DECIMAL_DIGIT_NUMBER) + +#define SPACE_BITS ((1 << SPACE_SEPARATOR) | (1 << LINE_SEPARATOR) \ + | (1 << PARAGRAPH_SEPARATOR)) + +#define CONNECTOR_BITS (1 << CONNECTOR_PUNCTUATION) /* * Unicode characters less than this value are represented by themselves @@ -813,10 +830,6 @@ Tcl_UtfBackslash(src, readPtr, dst) * Convert lowercase characters to uppercase characters in a UTF * string in place. The conversion may shrink the UTF string. * - * INTL: This implementation only handles iso8859-1 characters - * in the current locale. This should be changed to use the - * Unicode character tables. - * * Results: * Returns the number of bytes in the resulting string * excluding the trailing null. @@ -841,16 +854,7 @@ Tcl_UtfToUpper(str) src = dst = str; while (*src) { src += Tcl_UtfToUniChar(src, &ch); - - /* - * INTL: This conversion should be replaced with a table lookup for the - * full Unicode translation. - */ - - if ((ch < 0x100) && islower(ch)) { /* INTL: ISO only */ - ch = (Tcl_UniChar) UCHAR(toupper(ch)); /* INTL: ISO only */ - } - dst += Tcl_UniCharToUtf(ch, dst); + dst += Tcl_UniCharToUtf(Tcl_UniCharToUpper(ch), dst); } *dst = '\0'; return (dst - str); @@ -864,10 +868,6 @@ Tcl_UtfToUpper(str) * Convert uppercase characters to lowercase characters in a UTF * string in place. The conversion may shrink the UTF string. * - * INTL: This implementation only handles iso8859-1 characters - * in the current locale. This should be changed to use the - * Unicode character tables. - * * Results: * Returns the number of bytes in the resulting string * excluding the trailing null. @@ -892,16 +892,7 @@ Tcl_UtfToLower(str) src = dst = str; while (*src) { src += Tcl_UtfToUniChar(src, &ch); - - /* - * INTL: This conversion should be replaced with a table lookup for the - * full Unicode translation. - */ - - if ((ch < 0x100) && isupper(ch)) { /* INTL: ISO only */ - ch = (Tcl_UniChar) UCHAR(tolower(ch)); /* INTL: ISO only */ - } - dst += Tcl_UniCharToUtf(ch, dst); + dst += Tcl_UniCharToUtf(Tcl_UniCharToLower(ch), dst); } *dst = '\0'; return (dst - str); @@ -916,10 +907,6 @@ Tcl_UtfToLower(str) * uppercase and the rest of the string to lowercase. The * conversion happens in place and may shrink the UTF string. * - * INTL: This implementation only handles iso8859-1 characters - * in the current locale. This should be changed to use the - * Unicode character tables. - * * Results: * Returns the number of bytes in the resulting string * excluding the trailing null. @@ -946,17 +933,11 @@ Tcl_UtfToTitle(str) if (*src) { src += Tcl_UtfToUniChar(src, &ch); - if ((ch < 0x100) && islower(ch)) { /* INTL: ISO only */ - ch = (Tcl_UniChar) UCHAR(toupper(ch)); /* INTL: ISO only */ - } - dst += Tcl_UniCharToUtf(ch, dst); + dst += Tcl_UniCharToUtf(Tcl_UniCharToTitle(ch), dst); } while (*src) { src += Tcl_UtfToUniChar(src, &ch); - if ((ch < 0x100) && isupper(ch)) { /* INTL: ISO only */ - ch = (Tcl_UniChar) UCHAR(tolower(ch)); /* INTL: ISO only */ - } - dst += Tcl_UniCharToUtf(ch, dst); + dst += Tcl_UniCharToUtf(Tcl_UniCharToLower(ch), dst); } *dst = '\0'; return (dst - str); @@ -969,8 +950,6 @@ Tcl_UtfToTitle(str) * * Compute the uppercase equivalent of the given Unicode character. * - * INTL: this implementation only works on ISO characters. - * * Results: * Returns the uppercase Unicode character. * @@ -984,9 +963,13 @@ Tcl_UniChar Tcl_UniCharToUpper(ch) int ch; /* Unicode character to convert. */ { - return (Tcl_UniChar) ((ch < 0x100) - ? UCHAR(toupper(ch)) /* INTL: ISO only */ - : ch); + int info = GetUniCharInfo(ch); + + if (GetCaseType(info) & 0x04) { + return (Tcl_UniChar) (ch - GetDelta(info)); + } else { + return ch; + } } /* @@ -996,8 +979,6 @@ Tcl_UniCharToUpper(ch) * * Compute the lowercase equivalent of the given Unicode character. * - * INTL: this implementation only works on ISO characters. - * * Results: * Returns the lowercase Unicode character. * @@ -1011,9 +992,13 @@ Tcl_UniChar Tcl_UniCharToLower(ch) int ch; /* Unicode character to convert. */ { - return (Tcl_UniChar) ((ch < 0x100) - ? UCHAR(tolower(ch)) /* INTL: ISO only */ - : ch); + int info = GetUniCharInfo(ch); + + if (GetCaseType(info) & 0x02) { + return (Tcl_UniChar) (ch + GetDelta(info)); + } else { + return ch; + } } /* @@ -1023,8 +1008,6 @@ Tcl_UniCharToLower(ch) * * Compute the titlecase equivalent of the given Unicode character. * - * INTL: this implementation only works on ISO characters. - * * Results: * Returns the titlecase Unicode character. * @@ -1038,9 +1021,20 @@ Tcl_UniChar Tcl_UniCharToTitle(ch) int ch; /* Unicode character to convert. */ { - return (Tcl_UniChar) ((ch < 0x100) - ? UCHAR(toupper(ch)) /* INTL: ISO only */ - : ch); + int info = GetUniCharInfo(ch); + int mode = GetCaseType(info); + + if (mode & 0x1) { + /* + * Subtract or add one depending on the original case. + */ + + return (Tcl_UniChar) (ch + ((mode & 0x4) ? -1 : 1)); + } else if (mode == 0x4) { + return (Tcl_UniChar) (ch - GetDelta(info)); + } else { + return ch; + } } /* @@ -1114,8 +1108,6 @@ TclUniCharNcmp(cs, ct, n) * * Test if a character is an alphanumeric Unicode character. * - * INTL: this implementation only works on ISO characters. - * * Results: * Returns 1 if character is alphanumeric. * @@ -1129,7 +1121,9 @@ int TclUniCharIsAlnum(ch) int ch; /* Unicode character to test. */ { - return ((ch < 0x100) ? isalnum(ch) : 0); /* INTL: ISO only */ + register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); + + return (((ALPHA_BITS | DIGIT_BITS) >> category) & 1); } /* @@ -1139,8 +1133,6 @@ TclUniCharIsAlnum(ch) * * Test if a character is an alphabetic Unicode character. * - * INTL: this implementation only works on ISO characters. - * * Results: * Returns 1 if character is alphabetic. * @@ -1154,7 +1146,8 @@ int TclUniCharIsAlpha(ch) int ch; /* Unicode character to test. */ { - return ((ch < 0x100) ? isalpha(ch) : 0); /* INTL: ISO only */ + register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); + return ((ALPHA_BITS >> category) & 1); } /* @@ -1164,10 +1157,8 @@ TclUniCharIsAlpha(ch) * * Test if a character is a numeric Unicode character. * - * INTL: this implementation only works on ISO characters. - * * Results: - * Returns 1 if character is a digit. + * Returns non-zero if character is a digit. * * Side effects: * None. @@ -1179,7 +1170,8 @@ int TclUniCharIsDigit(ch) int ch; /* Unicode character to test. */ { - return ((ch < 0x100) ? isdigit(ch) : 0); /* INTL: ISO only */ + return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) + == DECIMAL_DIGIT_NUMBER); } /* @@ -1189,10 +1181,8 @@ TclUniCharIsDigit(ch) * * Test if a character is a lowercase Unicode character. * - * INTL: this implementation only works on ISO characters. - * * Results: - * Returns 1 if character is lowercase. + * Returns non-zero if character is lowercase. * * Side effects: * None. @@ -1204,7 +1194,7 @@ int TclUniCharIsLower(ch) int ch; /* Unicode character to test. */ { - return ((ch < 0x100) ? islower(ch) : 0); /* INTL: ISO only */ + return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == LOWERCASE_LETTER); } /* @@ -1214,10 +1204,8 @@ TclUniCharIsLower(ch) * * Test if a character is a whitespace Unicode character. * - * INTL: this implementation only works on ISO characters. - * * Results: - * Returns 1 if character is a space. + * Returns non-zero if character is a space. * * Side effects: * None. @@ -1229,7 +1217,19 @@ int TclUniCharIsSpace(ch) int ch; /* Unicode character to test. */ { - return ((ch < 0x100) ? isspace(ch) : 0); /* INTL: ISO only */ + register int category; + + /* + * If the character is within the first 127 characters, just use the + * standard C function, otherwise consult the Unicode table. + */ + + if (ch < 0x80) { + return isspace(UCHAR(ch)); /* INTL: ISO space */ + } else { + category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); + return ((SPACE_BITS >> category) & 1); + } } /* @@ -1239,10 +1239,8 @@ TclUniCharIsSpace(ch) * * Test if a character is a uppercase Unicode character. * - * INTL: this implementation only works on ISO characters. - * * Results: - * Returns 1 if character is uppercase. + * Returns non-zero if character is uppercase. * * Side effects: * None. @@ -1254,5 +1252,31 @@ int TclUniCharIsUpper(ch) int ch; /* Unicode character to test. */ { - return ((ch < 0x100) ? isupper(ch) : 0); /* INTL: ISO only */ + return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == UPPERCASE_LETTER); +} + +/* + *---------------------------------------------------------------------- + * + * TclUniCharIsWordChar -- + * + * Test if a character is alphanumeric or a connector punctuation + * mark. + * + * Results: + * Returns 1 if character is a word character. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +TclUniCharIsWordChar(ch) + int ch; /* Unicode character to test. */ +{ + register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); + + return (((ALPHA_BITS | DIGIT_BITS | CONNECTOR_BITS) >> category) & 1); } diff --git a/generic/tclUtf.h b/generic/tclUtf.h new file mode 100644 index 0000000..481a0ae --- /dev/null +++ b/generic/tclUtf.h @@ -0,0 +1,625 @@ +/* + * tclUtf.h -- + * + * Declarations of Unicode character information tables. This file is + * automatically generated by the tools/uniParse.tcl script. Do not + * modify this file by hand. + * + * Copyright (c) 1998 by Scriptics Corporation. + * All rights reserved. + * + * RCS: @(#) $Id: tclUtf.h,v 1.1.2.1 1998/10/16 01:16:58 stanton Exp $ + */ + +#ifndef _TCLUTF +#define _TCLUTF + +/* + * A 16-bit Unicode character is split into two parts in order to index + * into the following tables. The lower OFFSET_BITS comprise an offset + * into a page of characters. The upper bits comprise the page number. + */ + +#define OFFSET_BITS 6 + +/* + * The pageMap is indexed by page number and returns an alternate page number + * that identifies a unique page of characters. Many Unicode characters map + * to the same alternate page number. + */ + +static char pageMap[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 28, 28, 28, 28, 28, 28, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, + 28, 28, 47, 48, 49, 50, 51, 52, 53, 28, 28, 28, 54, 55, 56, 57, 58, + 59, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 60, 60, + 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 75, + 76, 77, 78, 28, 28, 79, 80, 81, 82, 83, 83, 84, 85, 86, 85, 28, 28, + 87, 88, 89, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 90, 91, 92, 93, 94, 56, 95, 28, 96, 97, 98, 99, 83, 100, 83, + 101, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 102, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 103, 28, 104, 104, 104, 104, 104, + 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, + 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 105, + 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, + 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, + 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, + 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, + 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, + 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, + 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, + 105, 56, 56, 56, 56, 106, 28, 28, 28, 107, 108, 109, 110, 56, 56, 56, + 56, 111, 112, 113, 114, 115, 116, 56, 117, 118, 119, 120, 121 +}; + +/* + * The groupMap is indexed by combining the alternate page number with + * the page offset and returns a group number that identifies a unique + * set of character attributes. + */ + +static char groupMap[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 3, 3, 4, 3, 3, 3, 5, 6, 3, 7, 3, 8, + 3, 3, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 3, 3, 7, 7, 7, 3, 3, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 5, 3, 6, 11, 12, 11, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 5, 7, 6, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 4, 4, + 4, 14, 14, 11, 14, 15, 16, 7, 8, 14, 11, 14, 7, 17, 17, 11, 15, 14, + 3, 11, 17, 15, 18, 17, 17, 17, 3, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 7, 10, 10, + 10, 10, 10, 10, 10, 15, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 7, 13, 13, 13, 13, + 13, 13, 13, 19, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, + 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, + 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, + 21, 22, 23, 20, 21, 20, 21, 20, 21, 15, 20, 21, 20, 21, 20, 21, 20, + 21, 20, 21, 20, 21, 20, 21, 20, 21, 15, 20, 21, 20, 21, 20, 21, 20, + 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, + 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, + 21, 20, 21, 20, 21, 24, 20, 21, 20, 21, 20, 21, 25, 15, 26, 20, 21, + 20, 21, 27, 20, 21, 28, 28, 20, 21, 15, 29, 30, 31, 20, 21, 28, 32, + 15, 33, 34, 20, 21, 15, 15, 33, 35, 15, 36, 20, 21, 20, 21, 20, 21, + 37, 20, 21, 38, 39, 15, 20, 21, 38, 20, 21, 40, 40, 20, 21, 20, 21, + 41, 20, 21, 15, 39, 20, 21, 39, 39, 39, 39, 39, 39, 42, 43, 44, 42, + 43, 44, 42, 43, 44, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, + 20, 21, 20, 21, 45, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, + 20, 21, 20, 21, 20, 21, 15, 42, 43, 44, 20, 21, 0, 0, 0, 0, 20, 21, + 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, + 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 15, 15, 15, 46, 47, 15, 48, 48, 15, 49, 15, 50, 15, 15, 15, 15, + 48, 15, 15, 51, 15, 15, 15, 15, 52, 53, 15, 15, 15, 15, 15, 53, 15, + 15, 54, 15, 15, 55, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 56, 15, 15, 15, 15, 56, 15, 57, 57, 15, 15, 15, 15, 15, 15, 58, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 0, 0, 0, 0, 0, 0, 0, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 11, 11, 59, 59, 59, 59, 59, 59, 59, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 59, 59, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 0, 59, 59, 59, 59, 59, 11, 11, 11, 11, 11, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, + 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, + 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 60, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 3, 3, 0, 0, 0, 0, 59, 0, 0, 0, 3, 0, 0, 0, 0, 0, 11, 11, 61, + 3, 62, 62, 62, 0, 63, 0, 64, 64, 15, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 65, 66, 66, 66, 15, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 67, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 68, 69, 69, 0, 70, 71, 37, 37, 37, 72, 73, 0, 0, 0, 37, 0, 37, 0, 37, + 0, 37, 0, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 74, + 75, 45, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 76, 76, 76, 76, + 76, 76, 76, 76, 76, 76, 76, 76, 0, 76, 76, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 0, 75, 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 0, 75, 75, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, + 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, + 20, 21, 20, 21, 20, 21, 14, 60, 60, 60, 60, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, + 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, + 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 39, 20, + 21, 20, 21, 0, 0, 20, 21, 0, 0, 20, 21, 0, 0, 0, 20, 21, 20, 21, 20, + 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, + 20, 21, 20, 21, 20, 21, 0, 0, 20, 21, 20, 21, 20, 21, 20, 21, 0, 0, + 20, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 0, 0, 59, 3, 3, + 3, 3, 3, 3, 0, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, 15, 0, 3, 0, 0, 0, 0, 0, 0, 0, 60, + 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 0, + 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 0, 60, 60, 60, 3, 60, 3, 60, 60, 3, 60, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 0, 0, 0, 0, 0, 39, 39, 39, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 3, 0, 0, 0, 3, 0, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 0, + 0, 0, 0, 0, 59, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 60, 60, 60, + 60, 60, 60, 60, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 3, 3, 3, 3, 0, 0, 60, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 0, 0, 39, 39, + 39, 39, 39, 0, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 0, 39, 39, 39, 39, 3, 39, 60, 60, 60, 60, 60, 60, 60, 79, 79, + 60, 60, 60, 60, 60, 60, 59, 59, 60, 60, 14, 60, 60, 60, 60, 0, 0, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 60, 80, 0, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 0, 0, 60, 39, + 80, 80, 80, 60, 60, 60, 60, 60, 60, 60, 60, 80, 80, 80, 80, 60, 0, + 0, 14, 60, 60, 60, 60, 0, 0, 0, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 60, 60, 3, 3, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 3, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 80, 80, 0, 39, 39, 39, 39, 39, 39, + 39, 39, 0, 0, 39, 39, 0, 0, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 0, 39, 39, 39, 39, + 39, 39, 39, 0, 39, 0, 0, 0, 39, 39, 39, 39, 0, 0, 60, 0, 80, 80, 80, + 60, 60, 60, 60, 0, 0, 80, 80, 0, 0, 80, 80, 60, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 80, 0, 0, 0, 0, 39, 39, 0, 39, 39, 39, 60, 60, 0, 0, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 39, 39, 4, 4, 17, 17, 17, 17, 17, 17, 14, 0, 0, + 0, 0, 0, 0, 0, 60, 0, 0, 39, 39, 39, 39, 39, 39, 0, 0, 0, 0, 39, 39, + 0, 0, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 0, 39, 39, 39, 39, 39, 39, 39, 0, 39, 39, 0, + 39, 39, 0, 39, 39, 0, 0, 60, 0, 80, 80, 80, 60, 60, 0, 0, 0, 0, 60, + 60, 0, 0, 60, 60, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 39, 39, + 39, 0, 39, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 60, 60, + 39, 39, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 60, 80, 0, 39, + 39, 39, 39, 39, 39, 39, 0, 39, 0, 39, 39, 39, 0, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 0, 39, 39, 39, 39, 39, 39, 39, 0, 39, 39, 0, 39, 39, 39, 39, 39, 0, + 0, 60, 39, 80, 80, 80, 60, 60, 60, 60, 60, 0, 60, 60, 80, 0, 80, 80, + 60, 0, 0, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 0, 0, + 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 60, 80, 80, 0, 39, 39, 39, 39, 39, 39, 39, 39, + 0, 0, 39, 39, 0, 0, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 0, 39, 39, 39, 39, 39, 39, + 39, 0, 39, 39, 0, 0, 39, 39, 39, 39, 0, 0, 60, 39, 80, 60, 80, 60, + 60, 60, 0, 0, 0, 80, 80, 0, 0, 80, 80, 60, 0, 0, 0, 0, 0, 0, 0, 0, + 60, 80, 0, 0, 0, 0, 39, 39, 0, 39, 39, 39, 0, 0, 0, 0, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 60, 80, 0, 39, 39, 39, 39, 39, 39, 0, 0, 0, 39, 39, 39, 0, 39, + 39, 39, 39, 0, 0, 0, 39, 39, 0, 39, 0, 39, 39, 0, 0, 0, 39, 39, 0, + 0, 0, 39, 39, 39, 0, 0, 0, 39, 39, 39, 39, 39, 39, 39, 39, 0, 39, 39, + 39, 0, 0, 0, 0, 80, 80, 60, 80, 80, 0, 0, 0, 80, 80, 80, 0, 80, 80, + 80, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 17, 17, 17, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 80, 80, 80, 0, 39, 39, 39, 39, 39, 39, 39, + 39, 0, 39, 39, 39, 0, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 0, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 0, 39, 39, 39, 39, 39, 0, 0, 0, 0, 60, 60, 60, + 80, 80, 80, 80, 0, 60, 60, 60, 0, 60, 60, 60, 60, 0, 0, 0, 0, 0, 0, + 0, 60, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 39, 0, 0, 0, 0, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 80, 80, 0, 39, 39, 39, 39, 39, 39, 39, 39, 0, 39, 39, 39, 0, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 0, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 0, 39, 39, 39, 39, 39, 0, 0, 0, 0, 80, 60, 80, 80, 80, 80, 80, 0, 60, + 80, 80, 0, 80, 80, 60, 60, 0, 0, 0, 0, 0, 0, 0, 80, 80, 0, 0, 0, 0, + 0, 0, 0, 39, 0, 39, 39, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80, 80, 0, 39, 39, + 39, 39, 39, 39, 39, 39, 0, 39, 39, 39, 0, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 0, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 0, + 0, 0, 0, 80, 80, 80, 60, 60, 60, 0, 0, 80, 80, 80, 0, 80, 80, 80, 60, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 80, 0, 0, 0, 0, 0, 0, 0, 0, 39, 39, 0, 0, + 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 3, + 39, 60, 39, 39, 60, 60, 60, 60, 60, 60, 60, 0, 0, 0, 0, 4, 39, 39, + 39, 39, 39, 39, 59, 60, 60, 60, 60, 60, 60, 60, 60, 14, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, + 39, 0, 39, 0, 0, 39, 39, 0, 39, 0, 0, 39, 0, 0, 0, 0, 0, 0, 39, 39, + 39, 39, 0, 39, 39, 39, 39, 39, 39, 39, 0, 39, 39, 39, 0, 39, 0, 39, + 0, 0, 39, 39, 0, 39, 39, 3, 39, 60, 39, 39, 60, 60, 60, 60, 60, 60, + 0, 60, 60, 39, 0, 0, 39, 39, 39, 39, 39, 0, 59, 0, 60, 60, 60, 60, + 60, 60, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 0, 0, 39, 39, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 14, 14, 14, 14, 14, 60, 60, 14, 14, 14, 14, 14, 14, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 14, 60, 14, 60, 14, 60, 5, 6, 5, 6, 5, 6, 39, 39, 39, 39, 39, 39, 39, + 39, 0, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 0, 0, 0, 0, 0, 0, 0, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 80, 60, 60, 60, 60, 60, 3, 60, 60, 14, 14, 14, 14, 0, 0, + 0, 0, 60, 60, 60, 60, 60, 60, 0, 60, 0, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 0, 0, 0, 60, + 60, 60, 60, 60, 60, 60, 0, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 0, 0, 0, 0, 3, 0, 0, 0, 0, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 0, 0, 0, 0, 0, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 0, 0, 0, 0, 0, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 0, 0, 0, 0, 0, 0, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, + 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, + 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, + 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, + 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, + 20, 21, 20, 21, 20, 21, 15, 15, 15, 15, 15, 81, 0, 0, 0, 0, 20, 21, + 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, + 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, + 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, + 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, + 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, 21, 20, + 21, 20, 21, 0, 0, 0, 0, 0, 0, 82, 82, 82, 82, 82, 82, 82, 82, 83, 83, + 83, 83, 83, 83, 83, 83, 82, 82, 82, 82, 82, 82, 0, 0, 83, 83, 83, 83, + 83, 83, 0, 0, 82, 82, 82, 82, 82, 82, 82, 82, 83, 83, 83, 83, 83, 83, + 83, 83, 82, 82, 82, 82, 82, 82, 82, 82, 83, 83, 83, 83, 83, 83, 83, + 83, 82, 82, 82, 82, 82, 82, 0, 0, 83, 83, 83, 83, 83, 83, 0, 0, 15, + 82, 15, 82, 15, 82, 15, 82, 0, 83, 0, 83, 0, 83, 0, 83, 82, 82, 82, + 82, 82, 82, 82, 82, 83, 83, 83, 83, 83, 83, 83, 83, 84, 84, 85, 85, + 85, 85, 86, 86, 87, 87, 88, 88, 89, 89, 0, 0, 82, 82, 82, 82, 82, 82, + 82, 82, 83, 83, 83, 83, 83, 83, 83, 83, 82, 82, 82, 82, 82, 82, 82, + 82, 83, 83, 83, 83, 83, 83, 83, 83, 82, 82, 82, 82, 82, 82, 82, 82, + 83, 83, 83, 83, 83, 83, 83, 83, 82, 82, 15, 90, 15, 0, 15, 15, 83, + 83, 91, 91, 92, 11, 37, 11, 11, 11, 15, 90, 15, 0, 15, 15, 93, 93, + 93, 93, 92, 11, 11, 11, 82, 82, 15, 15, 0, 0, 15, 15, 83, 83, 94, 94, + 0, 11, 11, 11, 82, 82, 15, 15, 15, 95, 15, 15, 83, 83, 96, 96, 97, + 11, 11, 11, 0, 0, 15, 90, 15, 0, 15, 15, 98, 98, 99, 99, 92, 11, 11, + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 100, 100, 100, 100, 8, 8, 8, + 8, 8, 8, 3, 3, 16, 18, 5, 16, 16, 18, 5, 16, 3, 3, 3, 3, 3, 3, 3, 3, + 101, 102, 100, 100, 100, 100, 100, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 16, + 18, 3, 3, 3, 3, 12, 12, 3, 3, 3, 7, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 100, 100, 100, 100, 100, 100, 17, 0, 0, 0, 17, 17, 17, 17, + 17, 17, 7, 7, 7, 5, 6, 15, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 7, 7, 7, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 79, 79, 79, + 79, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 37, 14, 14, 14, 14, 37, 14, 14, + 15, 37, 37, 37, 15, 15, 37, 37, 37, 15, 14, 37, 14, 14, 37, 37, 37, + 37, 37, 37, 14, 14, 14, 14, 14, 14, 37, 14, 37, 14, 37, 14, 37, 37, + 37, 37, 15, 15, 37, 37, 14, 37, 15, 39, 39, 39, 39, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 103, 103, 103, 103, + 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 104, 104, + 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, + 105, 105, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 7, 14, 7, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 0, 14, 14, 14, 14, 14, 14, 7, + 7, 7, 7, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 7, 7, 14, 14, 14, 14, 14, 14, 14, 5, 6, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 106, 106, 106, + 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, + 106, 106, 106, 106, 106, 106, 106, 106, 106, 107, 107, 107, 107, 107, + 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, + 107, 107, 107, 107, 107, 107, 107, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 0, 14, 14, 14, 14, 0, 14, 14, 14, 14, 0, 0, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 0, 14, 0, 14, 14, 14, 14, 0, 0, 0, 14, + 0, 14, 14, 14, 14, 14, 14, 14, 0, 0, 14, 14, 14, 14, 14, 14, 14, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 14, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 2, 3, 3, + 3, 14, 59, 3, 105, 5, 6, 5, 6, 5, 6, 5, 6, 5, 6, 14, 14, 5, 6, 5, 6, + 5, 6, 5, 6, 8, 5, 6, 6, 14, 105, 105, 105, 105, 105, 105, 105, 105, + 105, 60, 60, 60, 60, 60, 60, 8, 59, 59, 59, 59, 59, 14, 14, 0, 0, 0, + 0, 0, 0, 0, 14, 0, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 0, 0, 0, 0, 60, 60, 59, 59, 59, 59, 0, 0, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 3, 59, 59, 59, 0, 0, 0, 0, 0, 0, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 0, + 0, 0, 0, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 0, 14, + 14, 17, 17, 17, 17, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, + 0, 0, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 0, 0, 0, 14, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, + 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, + 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, + 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, + 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, + 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 15, 15, 15, 15, 15, 15, 15, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 15, 15, 15, 15, 15, 0, 0, 0, 0, 0, 0, 60, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 7, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 0, 39, 39, 39, 39, 39, 0, 39, 0, 39, 39, 0, 39, + 39, 0, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 0, 0, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 60, 60, 60, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 8, 8, + 12, 12, 5, 6, 5, 6, 5, 6, 5, 6, 5, 6, 5, 6, 5, 6, 5, 6, 0, 0, 0, 0, + 3, 3, 3, 3, 12, 12, 12, 3, 3, 3, 0, 3, 3, 3, 3, 8, 5, 6, 5, 6, 5, 6, + 3, 3, 3, 7, 8, 7, 7, 7, 0, 3, 4, 3, 3, 0, 0, 0, 0, 39, 39, 39, 0, 39, + 0, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 0, 0, 100, 0, 3, 3, 3, 4, 3, 3, 3, 5, 6, 3, 7, 3, 8, + 3, 3, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 3, 3, 7, 7, 7, 3, 3, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 5, 3, 6, 11, 12, 11, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 5, 7, 6, 7, 0, 0, 3, 5, 6, 3, 3, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 59, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 59, + 59, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 0, 0, 0, + 39, 39, 39, 39, 39, 39, 0, 0, 39, 39, 39, 39, 39, 39, 0, 0, 39, 39, + 39, 39, 39, 39, 0, 0, 39, 39, 39, 0, 0, 0, 4, 4, 7, 11, 14, 4, 4, 0, + 7, 7, 7, 7, 7, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, + 0, 0 +}; + +/* + * Each group represents a unique set of character attributes. The attributes + * are encoded into a 32-bit value as follows: + * + * Bits 0-4 Character category: see the constants listed below. + * + * Bits 5-7 Case delta type: 000 = identity + * 010 = add delta for lower + * 011 = add delta for lower, add 1 for title + * 100 = sutract delta for title/upper + * 101 = sub delta for upper, sub 1 for title + * 110 = sub delta for upper, add delta for lower + * + * Bits 8-21 Reserved for future use. + * + * Bits 22-31 Case delta: delta for case conversions. This should be the + * highest field so we can easily sign extend. + */ + +static int groups[] = { + 0, 15, 12, 25, 27, 21, 22, 26, 20, 9, 134217793, 28, 19, 134217858, + 29, 2, 23, 11, 24, -507510654, 4194369, 4194434, -834666431, 973078658, + -507510719, 1258291330, 880803905, 864026689, 859832385, 331350081, + 847249473, 851443777, 868220993, 884998209, 876609601, 893386817, + 897581121, 1, 914358337, 5, 910164033, 918552641, 8388705, 4194499, + 8388770, 331350146, 880803970, 864026754, 859832450, 847249538, + 851443842, 868221058, 876609666, 884998274, 893386882, 897581186, + 914358402, 910164098, 918552706, 4, 6, 159383617, 155189313, 268435521, + 264241217, 159383682, 155189378, 130023554, 268435586, 264241282, + 260046978, 239075458, 197132418, 226492546, 360710274, 335544450, + 335544385, 201326657, 201326722, 7, 8, 247464066, -33554302, -33554367, + -310378366, -360710014, -419430270, -536870782, -469761918, -528482174, + -37748606, -310378431, -37748671, -360710079, -419430335, -29359998, + -469761983, -29360063, -536870847, -528482239, 16, 13, 14, 67108938, + 67109002, 10, 109051997, 109052061, 18, 17 +}; + +/* + * The following constants are used to determine the category of a + * Unicode character. + */ + +#define UNICODE_CATEGORY_MASK 0X1F + +enum { + UNASSIGNED, + UPPERCASE_LETTER, + LOWERCASE_LETTER, + TITLECASE_LETTER, + MODIFIER_LETTER, + OTHER_LETTER, + NON_SPACING_MARK, + ENCLOSING_MARK, + COMBINING_SPACING_MARK, + DECIMAL_DIGIT_NUMBER, + LETTER_NUMBER, + OTHER_NUMBER, + SPACE_SEPARATOR, + LINE_SEPARATOR, + PARAGRAPH_SEPARATOR, + CONTROL, + FORMAT, + PRIVATE_USE, + SURROGATE, + CONNECTOR_PUNCTUATION, + DASH_PUNCTUATION, + OPEN_PUNCTUATION, + CLOSE_PUNCTUATION, + INITIAL_QUOTE_PUNCTUATION, + FINAL_QUOTE_PUNCTUATION, + OTHER_PUNCTUATION, + MATH_SYMBOL, + CURRENCY_SYMBOL, + MODIFIER_SYMBOL, + OTHER_SYMBOL +}; + +/* + * The following macros extract the fields of the character info. The + * GetDelta() macro is complicated because we can't rely on the C compiler + * to do sign extension on right shifts. + */ + +#define GetCaseType(info) (((info) & 0xE0) >> 5) +#define GetCategory(info) ((info) & 0x1F) +#define GetDelta(infO) (((info) > 0) ? ((info) >> 22) : (~(~((info)) >> 22))) + +/* + * This macro extracts the information about a character from the + * Unicode character tables. + */ + +#define GetUniCharInfo(ch) (groups[groupMap[(pageMap[(((int)(ch)) & 0xffff) >> OFFSET_BITS] << OFFSET_BITS) | ((ch) & ((1 << OFFSET_BITS)-1))]]) + +#endif /* _TCLUTF */ diff --git a/tests/cmdMZ.test b/tests/cmdMZ.test index 142ab29..6559ce6 100644 --- a/tests/cmdMZ.test +++ b/tests/cmdMZ.test @@ -156,7 +156,7 @@ test cmdMZ-5.1 {Tcl_StringObjCmd: error conditions} { } {1 {wrong # args: should be "string option arg ?arg ...?"}} test cmdMZ-5.2 {Tcl_StringObjCmd: error conditions} { list [catch {string gorp a b} msg] $msg -} {1 {bad option "gorp": must be compare, first, index, last, length, match, range, tolower, toupper, trim, trimleft, trimright, wordend, or wordstart}} +} {1 {bad option "gorp": must be compare, first, index, last, length, match, range, tolower, toupper, totitle, trim, trimleft, trimright, wordend, or wordstart}} test cmdMZ-6.1 {Tcl_StringObjCmd: string compare} { list [catch {string compare a} msg] $msg @@ -372,11 +372,8 @@ test cmdMZ-13.4 {Tcl_StringObjCmd: string tolower} { test cmdMZ-13.5 {Tcl_StringObjCmd: string tolower} { string tolower {123#$&*()} } {123#$&*()} -test cmdMZ-13.6 {Tcl_StringObjCmd: string tolower, unicode} {hasIsoLocale} { - set_iso8859_1_locale - set result [string tolower ABCabc\xc7\xe7] - restore_locale - set result +test cmdMZ-13.6 {Tcl_StringObjCmd: string tolower, unicode} { + string tolower ABCabc\xc7\xe7 } "abcabc\xe7\xe7" test cmdMZ-14.1 {Tcl_StringObjCmd: string toupper} { @@ -394,159 +391,166 @@ test cmdMZ-14.4 {Tcl_StringObjCmd: string toupper} { test cmdMZ-14.5 {Tcl_StringObjCmd: string toupper} { string toupper {123#$&*()} } {123#$&*()} -test cmdMZ-14.6 {Tcl_StringObjCmd: string toupper, unicode} {hasIsoLocale} { - set_iso8859_1_locale - set result [string toupper ABCabc\xc7\xe7] - restore_locale - set result +test cmdMZ-14.6 {Tcl_StringObjCmd: string toupper, unicode} { + string toupper ABCabc\xc7\xe7 } "ABCABC\xc7\xc7" -test cmdMZ-15.1 {Tcl_StringObjCmd: string trim} { +test cmdMZ-15.1 {Tcl_StringObjCmd: string totitle} { + list [catch {string totitle} msg] $msg +} {1 {wrong # args: should be "string totitle string"}} +test cmdMZ-15.2 {Tcl_StringObjCmd: string totitle} { + list [catch {string totitle a b} msg] $msg +} {1 {wrong # args: should be "string totitle string"}} +test cmdMZ-15.3 {Tcl_StringObjCmd: string totitle} { + string totitle abCDEf +} {Abcdef} +test cmdMZ-15.4 {Tcl_StringObjCmd: string totitle} { + string totitle "abc xYz" +} {Abc xyz} +test cmdMZ-15.5 {Tcl_StringObjCmd: string totitle} { + string totitle {123#$&*()} +} {123#$&*()} +test cmdMZ-15.6 {Tcl_StringObjCmd: string totitle, unicode} { + string totitle ABCabc\xc7\xe7 +} "Abcabc\xe7\xe7" +test cmdMZ-15.7 {Tcl_StringObjCmd: string totitle, unicode} { + string totitle \u01f3BCabc\xc7\xe7 +} "\u01f2bcabc\xe7\xe7" + +test cmdMZ-16.1 {Tcl_StringObjCmd: string trim} { list [catch {string trim} msg] $msg } {1 {wrong # args: should be "string trim string ?chars?"}} -test cmdMZ-15.2 {Tcl_StringObjCmd: string trim} { +test cmdMZ-16.2 {Tcl_StringObjCmd: string trim} { list [catch {string trim a b c} msg] $msg } {1 {wrong # args: should be "string trim string ?chars?"}} -test cmdMZ-15.3 {Tcl_StringObjCmd: string trim} { +test cmdMZ-16.3 {Tcl_StringObjCmd: string trim} { string trim " XYZ " } {XYZ} -test cmdMZ-15.4 {Tcl_StringObjCmd: string trim} { +test cmdMZ-16.4 {Tcl_StringObjCmd: string trim} { string trim "\t\nXYZ\t\n\r\n" } {XYZ} -test cmdMZ-15.5 {Tcl_StringObjCmd: string trim} { +test cmdMZ-16.5 {Tcl_StringObjCmd: string trim} { string trim " A XYZ A " } {A XYZ A} -test cmdMZ-15.6 {Tcl_StringObjCmd: string trim} { +test cmdMZ-16.6 {Tcl_StringObjCmd: string trim} { string trim "XXYYZZABC XXYYZZ" ZYX } {ABC } -test cmdMZ-15.7 {Tcl_StringObjCmd: string trim} { +test cmdMZ-16.7 {Tcl_StringObjCmd: string trim} { string trim " \t\r " } {} -test cmdMZ-15.8 {Tcl_StringObjCmd: string trim} { +test cmdMZ-16.8 {Tcl_StringObjCmd: string trim} { string trim {abcdefg} {} } {abcdefg} -test cmdMZ-15.9 {Tcl_StringObjCmd: string trim} { +test cmdMZ-16.9 {Tcl_StringObjCmd: string trim} { string trim {} } {} -test cmdMZ-15.10 {Tcl_StringObjCmd: string trim} { +test cmdMZ-16.10 {Tcl_StringObjCmd: string trim} { string trim ABC DEF } {ABC} -test cmdMZ-15.11 {Tcl_StringObjCmd: string trim, unicode} { +test cmdMZ-16.11 {Tcl_StringObjCmd: string trim, unicode} { string trim "\xe7\xe8 AB\xe7C \xe8\xe7" \xe7\xe8 } " AB\xe7C " -test cmdMZ-16.1 {Tcl_StringObjCmd: string trimleft} { +test cmdMZ-17.1 {Tcl_StringObjCmd: string trimleft} { string trimleft " XYZ " } {XYZ } -test cmdMZ-16.2 {Tcl_StringObjCmd: string trimleft} { +test cmdMZ-17.2 {Tcl_StringObjCmd: string trimleft} { list [catch {string trimleft} msg] $msg } {1 {wrong # args: should be "string trimleft string ?chars?"}} -test cmdMZ-16.3 {Tcl_StringObjCmd: string trimleft} { +test cmdMZ-17.3 {Tcl_StringObjCmd: string trimleft} { string length [string trimleft " "] } {0} -test cmdMZ-17.1 {Tcl_StringObjCmd: string trimright} { +test cmdMZ-18.1 {Tcl_StringObjCmd: string trimright} { string trimright " XYZ " } { XYZ} -test cmdMZ-17.2 {Tcl_StringObjCmd: string trimright} { +test cmdMZ-18.2 {Tcl_StringObjCmd: string trimright} { string trimright " " } {} -test cmdMZ-17.3 {Tcl_StringObjCmd: string trimright} { +test cmdMZ-18.3 {Tcl_StringObjCmd: string trimright} { string trimright "" } {} -test cmdMZ-17.4 {Tcl_StringObjCmd: string trimright errors} { +test cmdMZ-18.4 {Tcl_StringObjCmd: string trimright errors} { list [catch {string trimright} msg] $msg } {1 {wrong # args: should be "string trimright string ?chars?"}} -test cmdMZ-17.5 {Tcl_StringObjCmd: string trimright errors} { +test cmdMZ-18.5 {Tcl_StringObjCmd: string trimright errors} { list [catch {string trimg a} msg] $msg -} {1 {bad option "trimg": must be compare, first, index, last, length, match, range, tolower, toupper, trim, trimleft, trimright, wordend, or wordstart}} +} {1 {bad option "trimg": must be compare, first, index, last, length, match, range, tolower, toupper, totitle, trim, trimleft, trimright, wordend, or wordstart}} -test cmdMZ-18.1 {Tcl_StringObjCmd: string wordend} { +test cmdMZ-19.1 {Tcl_StringObjCmd: string wordend} { list [catch {string wordend a} msg] $msg } {1 {wrong # args: should be "string wordend string index"}} -test cmdMZ-18.2 {Tcl_StringObjCmd: string wordend} { +test cmdMZ-19.2 {Tcl_StringObjCmd: string wordend} { list [catch {string wordend a b c} msg] $msg } {1 {wrong # args: should be "string wordend string index"}} -test cmdMZ-18.3 {Tcl_StringObjCmd: string wordend} { +test cmdMZ-19.3 {Tcl_StringObjCmd: string wordend} { list [catch {string wordend a gorp} msg] $msg } {1 {expected integer but got "gorp"}} -test cmdMZ-18.4 {Tcl_StringObjCmd: string wordend} { +test cmdMZ-19.4 {Tcl_StringObjCmd: string wordend} { string wordend abc. -1 } 3 -test cmdMZ-18.5 {Tcl_StringObjCmd: string wordend} { +test cmdMZ-19.5 {Tcl_StringObjCmd: string wordend} { string wordend abc. 100 } 4 -test cmdMZ-18.6 {Tcl_StringObjCmd: string wordend} { +test cmdMZ-19.6 {Tcl_StringObjCmd: string wordend} { string wordend "word_one two three" 2 } 8 -test cmdMZ-18.7 {Tcl_StringObjCmd: string wordend} { +test cmdMZ-19.7 {Tcl_StringObjCmd: string wordend} { string wordend "one .&# three" 5 } 6 -test cmdMZ-18.8 {Tcl_StringObjCmd: string wordend} { +test cmdMZ-19.8 {Tcl_StringObjCmd: string wordend} { string worde "x.y" 0 } 1 -test cmdMZ-18.9 {Tcl_StringObjCmd: string wordend, unicode} {hasIsoLocale} { - set_iso8859_1_locale - set result [string wordend "xyz\u00c7de fg" 0] - restore_locale - set result +test cmdMZ-19.9 {Tcl_StringObjCmd: string wordend, unicode} { + string wordend "xyz\u00c7de fg" 0 } 6 -test cmdMZ-18.10 {Tcl_StringObjCmd: string wordend, unicode} {hasIsoLocale} { - set_iso8859_1_locale - set result [string wordend "xyz\uc700de fg" 0] - restore_locale - set result -} 3 -test cmdMZ-18.11 {Tcl_StringObjCmd: string wordend, unicode} {hasIsoLocale} { - set_iso8859_1_locale - set result [string wordend "xyz\uc700de fg" 0] - restore_locale - set result +test cmdMZ-19.10 {Tcl_StringObjCmd: string wordend, unicode} { + string wordend "xyz\uc700de fg" 0 +} 6 +test cmdMZ-19.11 {Tcl_StringObjCmd: string wordend, unicode} { + string wordend "xyz\u203fde fg" 0 +} 6 +test cmdMZ-19.12 {Tcl_StringObjCmd: string wordend, unicode} { + string wordend "xyz\u2045de fg" 0 } 3 -test cmdMZ-18.12 {Tcl_StringObjCmd: string wordend, unicode} { +test cmdMZ-19.13 {Tcl_StringObjCmd: string wordend, unicode} { string wordend "\uc700\uc700 abc" 8 } 6 -test cmdMZ-19.1 {Tcl_StringObjCmd: string wordstart} { +test cmdMZ-20.1 {Tcl_StringObjCmd: string wordstart} { list [catch {string word a} msg] $msg -} {1 {ambiguous option "word": must be compare, first, index, last, length, match, range, tolower, toupper, trim, trimleft, trimright, wordend, or wordstart}} -test cmdMZ-19.2 {Tcl_StringObjCmd: string wordstart} { +} {1 {ambiguous option "word": must be compare, first, index, last, length, match, range, tolower, toupper, totitle, trim, trimleft, trimright, wordend, or wordstart}} +test cmdMZ-20.2 {Tcl_StringObjCmd: string wordstart} { list [catch {string wordstart a} msg] $msg } {1 {wrong # args: should be "string wordstart string index"}} -test cmdMZ-19.3 {Tcl_StringObjCmd: string wordstart} { +test cmdMZ-20.3 {Tcl_StringObjCmd: string wordstart} { list [catch {string wordstart a b c} msg] $msg } {1 {wrong # args: should be "string wordstart string index"}} -test cmdMZ-19.4 {Tcl_StringObjCmd: string wordstart} { +test cmdMZ-20.4 {Tcl_StringObjCmd: string wordstart} { list [catch {string wordstart a gorp} msg] $msg } {1 {expected integer but got "gorp"}} -test cmdMZ-19.5 {Tcl_StringObjCmd: string wordstart} { +test cmdMZ-20.5 {Tcl_StringObjCmd: string wordstart} { string wordstart "one two three_words" 400 } 8 -test cmdMZ-19.6 {Tcl_StringObjCmd: string wordstart} { +test cmdMZ-20.6 {Tcl_StringObjCmd: string wordstart} { string wordstart "one two three_words" 2 } 0 -test cmdMZ-19.7 {Tcl_StringObjCmd: string wordstart} { +test cmdMZ-20.7 {Tcl_StringObjCmd: string wordstart} { string wordstart "one two three_words" -2 } 0 -test cmdMZ-19.8 {Tcl_StringObjCmd: string wordstart} { +test cmdMZ-20.8 {Tcl_StringObjCmd: string wordstart} { string wordstart "one .*&^ three" 6 } 6 -test cmdMZ-19.9 {Tcl_StringObjCmd: string wordstart} { +test cmdMZ-20.9 {Tcl_StringObjCmd: string wordstart} { string wordstart "one two three" 4 } 4 -test cmdMZ-19.10 {Tcl_StringObjCmd: string wordstart, unicode} {hasIsoLocale} { - set_iso8859_1_locale - set result [string wordstart "one tw\u00c7o three" 7] - restore_locale - set result +test cmdMZ-20.10 {Tcl_StringObjCmd: string wordstart, unicode} { + string wordstart "one tw\u00c7o three" 7 } 4 -test cmdMZ-19.11 {Tcl_StringObjCmd: string wordstart, unicode} {hasIsoLocale} { - set_iso8859_1_locale - set result [string wordstart "ab\uc700\uc700 cdef ghi" 12] - restore_locale - set result +test cmdMZ-20.11 {Tcl_StringObjCmd: string wordstart, unicode} { + string wordstart "ab\uc700\uc700 cdef ghi" 12 } 10 -test cmdMZ-19.12 {Tcl_StringObjCmd: string wordstart, unicode} { +test cmdMZ-20.12 {Tcl_StringObjCmd: string wordstart, unicode} { string wordstart "\uc700\uc700 abc" 8 } 3 @@ -557,3 +561,4 @@ test cmdMZ-19.12 {Tcl_StringObjCmd: string wordstart, unicode} { # The tests for Tcl_WhileObjCmd are in while.test return + diff --git a/tests/utf.test b/tests/utf.test index 234048b..f7e5922 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -161,37 +161,107 @@ bsCheck \ua1 161 bsCheck \u4e21 63 test utf-11.1 {Tcl_UtfToUpper} { + string toupper {} +} {} +test utf-11.2 {Tcl_UtfToUpper} { string toupper abc } ABC -test utf-11.2 {Tcl_UtfToUpper} {hasIsoLocale} { - set_iso8859_1_locale - set result [string toupper \u00e3ab] - restore_locale - set result +test utf-11.3 {Tcl_UtfToUpper} { + string toupper \u00e3ab } \u00c3AB -test utf-11.3 {Tcl_UtfToUpper} {hasIsoLocale} { - set_iso8859_1_locale - set result [string toupper \u01e3ab] - restore_locale - set result -} \u01e3AB +test utf-11.4 {Tcl_UtfToUpper} { + string toupper \u01e3ab +} \u01e2AB test utf-12.1 {Tcl_UtfToLower} { + string tolower {} +} {} +test utf-12.2 {Tcl_UtfToLower} { string tolower ABC } abc -test utf-12.2 {Tcl_UtfToLower} {hasIsoLocale} { - set_iso8859_1_locale - set result [string tolower \u00c3AB] - restore_locale - set result +test utf-12.3 {Tcl_UtfToLower} { + string tolower \u00c3AB } \u00e3ab -test utf-12.3 {Tcl_UtfToLower} {hasIsoLocale} { - set_iso8859_1_locale - set result [string tolower \u01c3AB] - restore_locale - set result -} \u01c3ab +test utf-12.4 {Tcl_UtfToLower} { + string tolower \u01e2AB +} \u01e3ab + +test utf-13.1 {Tcl_UtfToTitle} { + string totitle {} +} {} +test utf-13.2 {Tcl_UtfToTitle} { + string totitle abc +} Abc +test utf-13.3 {Tcl_UtfToTitle} { + string totitle \u00e3ab +} \u00c3ab +test utf-13.4 {Tcl_UtfToTitle} { + string totitle \u01f3ab +} \u01f2ab + +test utf-14.1 {Tcl_UniCharToUpper, negative delta} { + string toupper aA +} AA +test utf-14.2 {Tcl_UniCharToUpper, positive delta} { + string toupper \u0178\u00ff +} \u0178\u0178 +test utf-14.3 {Tcl_UniCharToUpper, no delta} { + string toupper ! +} ! +test utf-15.1 {Tcl_UniCharToLower, negative delta} { + string tolower aA +} aa +test utf-15.2 {Tcl_UniCharToLower, positive delta} { + string tolower \u0178\u00ff +} \u00ff\u00ff +test utf-16.1 {Tcl_UniCharToLower, no delta} { + string tolower ! +} ! + +test utf-17.1 {Tcl_UniCharToTitle, add one for title} { + string totitle \u01c4 +} \u01c5 +test utf-17.2 {Tcl_UniCharToTitle, subtract one for title} { + string totitle \u01c6 +} \u01c5 +test utf-17.3 {Tcl_UniCharToTitle, subtract delta for title (positive)} { + string totitle \u017f +} \u0053 +test utf-17.4 {Tcl_UniCharToTitle, subtract delta for title (negative)} { + string totitle \u00ff +} \u0178 +test utf-17.5 {Tcl_UniCharToTitle, no delta} { + string totitle ! +} ! + +test utf-18.1 {TclUniCharLen} { + list [regexp \\d abc456def foo] $foo +} {1 4} + +test utf-19.1 {TclUniCharNcmp} { +} {} + +test utf-20.1 {TclUniCharIsAlnum} { +} {} + +test utf-21.1 {TclUniCharIsWordChar} { + string wordend "xyz123_bar fg" 0 +} 10 +test utf-21.1 {TclUniCharIsWordChar} { + string wordend "x\u5080z123_bar\u203c fg" 0 +} 10 + +test utf-22.1 {TclUniCharIsAlpha} { +} {} + +test utf-23.1 {TclUniCharIsDigit} { +} {} + +test utf-23.1 {TclUniCharIsSpace} { +} {} return + + diff --git a/tools/uniParse.tcl b/tools/uniParse.tcl new file mode 100644 index 0000000..4ea97a9 --- /dev/null +++ b/tools/uniParse.tcl @@ -0,0 +1,364 @@ +namespace eval uni { + set shift 9 ;# number of bits of data within a page + variable pMap ;# map from page to page index, each entry is + # an index into the pages table, indexed by + # page number + variable pages ;# map from page index to page info, each + # entry is a list of indices into the groups + # table, the list is indexed by the offset + variable groups ;# list of character info values, indexed by + # group number, initialized with the + # unassigned character group + + variable categories { + Cn Lu Ll Lt Lm Lo Mn Me Mc Nd Nl No Zs Zl Zp + Cc Cf Co Cs Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So + } ;# Ordered list of character categories, must + # match the enumeration in the header file. + +} + +proc uni::getValue {items index} { + variable categories + + # Extract character info + + set category [lindex $items 2] + if {[scan [lindex $items 12] %4x toupper] == 1} { + set toupper [expr {$index - $toupper}] + } else { + set toupper {} + } + if {[scan [lindex $items 13] %4x tolower] == 1} { + set tolower [expr {$tolower - $index}] + } else { + set tolower {} + } + if {[scan [lindex $items 14] %4x totitle] == 1} { + set totitle [expr {$index - $totitle}] + } else { + set totitle {} + } + + set categoryIndex [lsearch -exact $categories $category] + if {$categoryIndex < 0} { + puts "Unexpected character category: $index($category)" + set categoryIndex 0 + } + + return "$categoryIndex,$toupper,$tolower,$totitle" +} + +proc uni::getGroup {value} { + variable groups + + set gIndex [lsearch -exact $groups $value] + if {$gIndex == -1} { + set gIndex [llength $groups] + lappend groups $value + } + return $gIndex +} + +proc uni::addPage {info} { + variable pMap + variable pages + + set pIndex [lsearch -exact $pages $info] + if {$pIndex == -1} { + set pIndex [llength $pages] + lappend pages $info + } + lappend pMap $pIndex + return +} + +proc uni::buildTables {data} { + variable shift + + variable pMap {} + variable pages {} + variable groups {{0,,,}} + set info {} ;# temporary page info + + set mask [expr {(1 << $shift) - 1}] + + set next 0 + + foreach line [split $data \n] { + if {$line == ""} { + set line "FFFF;;Cn;0;ON;;;;;N;;;;;\n" + } + + set items [split $line \;] + + scan [lindex $items 0] %4x index + set index [format 0x%0.4x $index] + + set gIndex [getGroup [getValue $items $index]] + + # Since the input table omits unassigned characters, these will + # show up as gaps in the index sequence. There are a few special cases + # where the gaps correspond to a uniform block of assigned characters. + # These are indicated as such in the character name. + + # Enter all unassigned characters up to the current character. + if {($index > $next) \ + && ![regexp "Last>$" [lindex $items 1]]} { + for {} {$next < $index} {incr next} { + lappend info 0 + if {($next & $mask) == $mask} { + addPage $info + set info {} + } + } + } + + # Enter all assigned characters up to the current character + for {set i $next} {$i <= $index} {incr i} { + # Split character index into offset and page number + set offset [expr {$i & $mask}] + set page [expr {($i >> $shift)}] + + # Add the group index to the info for the current page + lappend info $gIndex + + # If this is the last entry in the page, add the page + if {$offset == $mask} { + addPage $info + set info {} + } + } + set next [expr {$index + 1}] + } + return +} + +proc uni::main {} { + global argc argv0 argv + variable pMap + variable pages + variable groups + variable shift + + if {$argc != 2 && $argc != 3} { + puts stderr "\nusage: $argv0 ?optimize?\n" + exit 1 + } + set f [open [lindex $argv 0] r] + set data [read $f] + close $f + + set shift 6 + buildTables $data + puts "X = [llength $pMap] Y= [llength $pages] A= [llength $groups]" + set size [expr {[llength $pMap] + [llength $pages]*(1<<$shift)}] + puts "shift = 6, space = $size" + + set f [open [lindex $argv 1] w] + puts $f "/* + * tclUtf.h -- + * + * Declarations of Unicode character information tables. This file is + * automatically generated by the tools/uniParse.tcl script. Do not + * modify this file by hand. + * + * Copyright (c) 1998 by Scriptics Corporation. + * All rights reserved. + * + * RCS: @(#) \$Id\$ + */ + +#ifndef _TCLUTF +#define _TCLUTF + +/* + * A 16-bit Unicode character is split into two parts in order to index + * into the following tables. The lower OFFSET_BITS comprise an offset + * into a page of characters. The upper bits comprise the page number. + */ + +#define OFFSET_BITS $shift + +/* + * The pageMap is indexed by page number and returns an alternate page number + * that identifies a unique page of characters. Many Unicode characters map + * to the same alternate page number. + */ + +static char pageMap\[\] = {" + set line " " + set last [expr {[llength $pMap] - 1}] + for {set i 0} {$i <= $last} {incr i} { + append line [lindex $pMap $i] + if {$i != $last} { + append line ", " + } + if {[string length $line] > 70} { + puts $f $line + set line " " + } + } + puts $f $line + puts $f "}; + +/* + * The groupMap is indexed by combining the alternate page number with + * the page offset and returns a group number that identifies a unique + * set of character attributes. + */ + +static char groupMap\[\] = {" + set line " " + set lasti [expr {[llength $pages] - 1}] + for {set i 0} {$i <= $lasti} {incr i} { + set page [lindex $pages $i] + set lastj [expr {[llength $page] - 1}] + for {set j 0} {$j <= $lastj} {incr j} { + append line [lindex $page $j] + if {$j != $lastj || $i != $lasti} { + append line ", " + } + if {[string length $line] > 70} { + puts $f $line + set line " " + } + } + } + puts $f $line + puts $f "}; + +/* + * Each group represents a unique set of character attributes. The attributes + * are encoded into a 32-bit value as follows: + * + * Bits 0-4 Character category: see the constants listed below. + * + * Bits 5-7 Case delta type: 000 = identity + * 010 = add delta for lower + * 011 = add delta for lower, add 1 for title + * 100 = sutract delta for title/upper + * 101 = sub delta for upper, sub 1 for title + * 110 = sub delta for upper, add delta for lower + * + * Bits 8-21 Reserved for future use. + * + * Bits 22-31 Case delta: delta for case conversions. This should be the + * highest field so we can easily sign extend. + */ + +static int groups\[\] = {" + set line " " + set last [expr {[llength $groups] - 1}] + for {set i 0} {$i <= $last} {incr i} { + foreach {type toupper tolower totitle} [split [lindex $groups $i] ,] {} + + # Compute the case conversion type and delta + + if {$totitle != ""} { + if {$totitle == $toupper} { + # subtract delta for title or upper + set case 4 + set delta $toupper + } elseif {$toupper != ""} { + # subtract delta for upper, subtract 1 for title + set case 5 + set delta $toupper + } else { + # add delta for lower, add 1 for title + set case 3 + set delta $tolower + } + } elseif {$toupper != ""} { + # subtract delta for upper, add delta for lower + set case 6 + set delta $toupper + } elseif {$tolower != ""} { + # add delta for lower + set case 2 + set delta $tolower + } else { + # noop + set case 0 + set delta 0 + } + + set val [expr {($delta << 22) | ($case << 5) | $type}] + + append line [format "%d" $val] + if {$i != $last} { + append line ", " + } + if {[string length $line] > 65} { + puts $f $line + set line " " + } + } + puts $f $line + puts $f "}; + +/* + * The following constants are used to determine the category of a + * Unicode character. + */ + +#define UNICODE_CATEGORY_MASK 0X1F + +enum { + UNASSIGNED, + UPPERCASE_LETTER, + LOWERCASE_LETTER, + TITLECASE_LETTER, + MODIFIER_LETTER, + OTHER_LETTER, + NON_SPACING_MARK, + ENCLOSING_MARK, + COMBINING_SPACING_MARK, + DECIMAL_DIGIT_NUMBER, + LETTER_NUMBER, + OTHER_NUMBER, + SPACE_SEPARATOR, + LINE_SEPARATOR, + PARAGRAPH_SEPARATOR, + CONTROL, + FORMAT, + PRIVATE_USE, + SURROGATE, + CONNECTOR_PUNCTUATION, + DASH_PUNCTUATION, + OPEN_PUNCTUATION, + CLOSE_PUNCTUATION, + INITIAL_QUOTE_PUNCTUATION, + FINAL_QUOTE_PUNCTUATION, + OTHER_PUNCTUATION, + MATH_SYMBOL, + CURRENCY_SYMBOL, + MODIFIER_SYMBOL, + OTHER_SYMBOL +}; + +/* + * The following macros extract the fields of the character info. The + * GetDelta() macro is complicated because we can't rely on the C compiler + * to do sign extension on right shifts. + */ + +#define GetCaseType(info) (((info) & 0xE0) >> 5) +#define GetCategory(info) ((info) & 0x1F) +#define GetDelta(infO) (((info) > 0) ? ((info) >> 22) : (~(~((info)) >> 22))) + +/* + * This macro extracts the information about a character from the + * Unicode character tables. + */ + +#define GetUniCharInfo(ch) (groups\[groupMap\[(pageMap\[(((int)(ch)) & 0xffff) >> OFFSET_BITS\] << OFFSET_BITS) | ((ch) & ((1 << OFFSET_BITS)-1))\]\]) + +#endif /* _TCLUTF */" + + close $f +} + +uni::main + +return -- cgit v0.12