diff options
Diffstat (limited to 'generic/locale.c')
-rw-r--r-- | generic/locale.c | 675 |
1 files changed, 0 insertions, 675 deletions
diff --git a/generic/locale.c b/generic/locale.c deleted file mode 100644 index ca56fc4..0000000 --- a/generic/locale.c +++ /dev/null @@ -1,675 +0,0 @@ -/* - * locale.c -- - * - * Regexp package file: - * collating-element handling and other locale-specific stuff - * - * Copyright (c) 1998 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., and Sun Microsystems Inc., none of - * whom are responsible for the results. The author thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Copyright (c) 1998 by Sun Microsystems, Inc. - * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. - * - * RCS: @(#) $Id: locale.c,v 1.1.2.2 1998/10/03 01:56:41 stanton Exp $ - */ - -/* - * This is largely dummy code, since it needs locale interfaces. The - * dummy code implements more or less the C locale. Parts of the code - * are marked "dummy" and "generic" in hopes of making the situation - * clearer. - * - * As a hack for testing, if REG_FAKE is turned on, we add a single - * collating element ch between c and d, and a single equivalence class - * containing x and y. - * - * The type "celt" is an entirely opaque non-array type -- it need not be - * an integer type, it could be (say) a pointer -- which has distinct values - * for all chrs and all collating elements. The only things the outside - * world does to celts are copying them around and comparing them for - * equality; everything else is done in this file. There need be no "null" - * value for celt. The dummy code uses wint_t as celt, with WEOF as the - * celt code for ch (ugh!). - */ - -/* - * dummy: - ^ #def MAXCE 2 // longest CE code is prepared to handle - ^ typedef wint_t celt; // type holding distinct codes for all chrs, all CEs - */ - -/* dummy: character-name table */ -static struct cname { - char *name; - char code; -} cnames[] = { - {"NUL", '\0'}, - {"SOH", '\001'}, - {"STX", '\002'}, - {"ETX", '\003'}, - {"EOT", '\004'}, - {"ENQ", '\005'}, - {"ACK", '\006'}, - {"BEL", '\007'}, - {"alert", '\007'}, - {"BS", '\010'}, - {"backspace", '\b'}, - {"HT", '\011'}, - {"tab", '\t'}, - {"LF", '\012'}, - {"newline", '\n'}, - {"VT", '\013'}, - {"vertical-tab", '\v'}, - {"FF", '\014'}, - {"form-feed", '\f'}, - {"CR", '\015'}, - {"carriage-return", '\r'}, - {"SO", '\016'}, - {"SI", '\017'}, - {"DLE", '\020'}, - {"DC1", '\021'}, - {"DC2", '\022'}, - {"DC3", '\023'}, - {"DC4", '\024'}, - {"NAK", '\025'}, - {"SYN", '\026'}, - {"ETB", '\027'}, - {"CAN", '\030'}, - {"EM", '\031'}, - {"SUB", '\032'}, - {"ESC", '\033'}, - {"IS4", '\034'}, - {"FS", '\034'}, - {"IS3", '\035'}, - {"GS", '\035'}, - {"IS2", '\036'}, - {"RS", '\036'}, - {"IS1", '\037'}, - {"US", '\037'}, - {"space", ' '}, - {"exclamation-mark", '!'}, - {"quotation-mark", '"'}, - {"number-sign", '#'}, - {"dollar-sign", '$'}, - {"percent-sign", '%'}, - {"ampersand", '&'}, - {"apostrophe", '\''}, - {"left-parenthesis", '('}, - {"right-parenthesis", ')'}, - {"asterisk", '*'}, - {"plus-sign", '+'}, - {"comma", ','}, - {"hyphen", '-'}, - {"hyphen-minus", '-'}, - {"period", '.'}, - {"full-stop", '.'}, - {"slash", '/'}, - {"solidus", '/'}, - {"zero", '0'}, - {"one", '1'}, - {"two", '2'}, - {"three", '3'}, - {"four", '4'}, - {"five", '5'}, - {"six", '6'}, - {"seven", '7'}, - {"eight", '8'}, - {"nine", '9'}, - {"colon", ':'}, - {"semicolon", ';'}, - {"less-than-sign", '<'}, - {"equals-sign", '='}, - {"greater-than-sign", '>'}, - {"question-mark", '?'}, - {"commercial-at", '@'}, - {"left-square-bracket", '['}, - {"backslash", '\\'}, - {"reverse-solidus", '\\'}, - {"right-square-bracket", ']'}, - {"circumflex", '^'}, - {"circumflex-accent", '^'}, - {"underscore", '_'}, - {"low-line", '_'}, - {"grave-accent", '`'}, - {"left-brace", '{'}, - {"left-curly-bracket", '{'}, - {"vertical-line", '|'}, - {"right-brace", '}'}, - {"right-curly-bracket", '}'}, - {"tilde", '~'}, - {"DEL", '\177'}, - {NULL, 0} -}; - -/* dummy: character-class table */ -static struct cclass { - char *name; - char *chars; - int hasch; -} cclasses[] = { - {"alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789", 1}, - {"alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", - 1}, - {"blank", " \t", 0}, - {"cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ -\25\26\27\30\31\32\33\34\35\36\37\177", 0}, - {"digit", "0123456789", 0}, - {"graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", - 1}, - {"lower", "abcdefghijklmnopqrstuvwxyz", - 1}, - {"print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", - 1}, - {"punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", - 0}, - {"space", "\t\n\v\f\r ", 0}, - {"upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", - 0}, - {"xdigit", "0123456789ABCDEFabcdef", - 0}, - {NULL, 0, 0} -}; - -#define CH WEOF /* dummy */ - -/* - - nces - how many distinct collating elements are there? - * This is pure dummy code, although a straight "return 0" is definitely - * what's wanted for all locales lucky enough not to have these stupid - * things. Case counterparts should be included. - ^ static int nces(struct vars *); - */ -static int -nces(v) -struct vars *v; -{ - return (v->cflags®_FAKE) ? 1 : 0; -} - -/* - - nleaders - how many chrs can be first chrs of collating elements? - * This is pure dummy code, although a straight "return 0" is definitely - * what's wanted for all locales lucky enough not to have these stupid - * things. Case counterparts should be included. - ^ static int nleaders(struct vars *); - */ -static int -nleaders(v) -struct vars *v; -{ - return (v->cflags®_FAKE) ? 1 : 0; -} - -/* - - allces - return a cvec with all the collating elements of the locale - * This would be kind of costly if there were large numbers of them; with - * any luck, that case does not occur in reality. Note that case variants - * should be included; "all" means *all*. - * This is pure dummy code. - ^ static struct cvec *allces(struct vars *, struct cvec *); - */ -static struct cvec * -allces(v, cv) -struct vars *v; -struct cvec *cv; /* this is supposed to have enough room */ -{ - assert(cv->cespace > 0); - (VOID) clearcvec(cv); - if (v->cflags®_FAKE) - addce(cv, ch()); - return cv; -} - -/* - - element - map collating-element name to celt - ^ static celt element(struct vars *, chr *, chr *); - */ -static celt -element(v, startp, endp) -struct vars *v; -chr *startp; /* points to start of name */ -chr *endp; /* points just past end of name */ -{ - register struct cname *cn; - register size_t len; - Tcl_DString ds; - char *name; - - /* generic: one-chr names stand for themselves */ - assert(startp < endp); - len = endp - startp; - if (len == 1) - return *startp; - - NOTE(REG_ULOCALE); - - /* - * INTL: ISO only, search table - */ - - Tcl_DStringInit(&ds); - name = TclUniCharToUtfDString(startp, (int) len, &ds); - - for (cn = cnames; cn->name != NULL; cn++) { - if (strlen(cn->name) == len && strncmp(cn->name, name, len) == 0) { - return UCHAR(cn->code); - } - } - Tcl_DStringFree(&ds); - - /* - * Special case for testing. - */ - - if ((v->cflags®_FAKE) && len == 2) { - if (*startp == 'c' && *(startp+1) == 'h') - return (celt) CH; - } - - /* generic: couldn't find it */ - ERR(REG_ECOLLATE); - return 0; -} - -/* - - range - supply cvec for a range, including legality check - * Must include case counterparts on request. - ^ static struct cvec *range(struct vars *, celt, celt, int); - */ -static struct cvec * -range(v, a, b, cases) -struct vars *v; -celt a; -celt b; /* might equal a */ -int cases; /* case-independent? */ -{ - int nchrs; - int appendch; - struct cvec *cv; - celt c; - - /* generic: legality check */ - if (a != b && !before(a, b)) { - ERR(REG_ERANGE); - return NULL; - } - - /* mostly dummy: compute vector length, note presence of ch */ - appendch = 0; - if (a == (celt) CH) { - if (b == (celt) CH) { - a = 'c'; - b = a - 1; /* kludge to get no chrs */ - appendch = 1; - } else { - a = 'd'; - appendch = 1; - } - } else { - if (b == CH) { - appendch = 1; - b = 'c'; - } else { - if ((v->cflags®_FAKE) && a <= 'c' && b >= 'd') - appendch = 1; - } - } - nchrs = b - a + 1; - if (cases) - nchrs *= 2; - cv = getcvec(v, nchrs, appendch); - NOERRN(); - - /* mostly dummy: fill in vector */ - for (c = a; c <= b; c++) { - addchr(cv, c); - if (cases) { - if (TclUniCharIsUpper((Tcl_UniChar)c)) - addchr(cv, (chr)Tcl_UniCharToLower( - (Tcl_UniChar)c)); - else if (TclUniCharIsLower((Tcl_UniChar)c)) - addchr(cv, (chr)Tcl_UniCharToUpper( - (Tcl_UniChar)c)); - } - } - if (appendch) - addce(cv, ch()); - - return cv; -} - -/* - - before - is celt x before celt y, for purposes of range legality? - * This is all dummy code. - ^ static int before(celt, celt); - */ -static int /* predicate */ -before(x, y) -celt x; -celt y; -{ - int isxch = (x == CH); - int isych = (y == CH); - - if (!isxch && !isych && x < y) - return 1; - if (isxch && !isych && y >= 'd') - return 1; - if (!isxch && isych && x <= 'c') - return 1; - return 0; -} - -/* - - eclass - supply cvec for an equivalence class - * Must include case counterparts on request. - * This is all dummy code. - ^ static struct cvec *eclass(struct vars *, celt, int); - */ -static struct cvec * -eclass(v, c, cases) -struct vars *v; -celt c; -int cases; /* all cases? */ -{ - struct cvec *cv; - - if (c == CH) { - cv = getcvec(v, 0, 1); - assert(cv != NULL); - addce(cv, ch()); - return cv; - } - - if ((v->cflags®_FAKE) && (c == 'x' || c == 'y')) { - cv = getcvec(v, 4, 0); - assert(cv != NULL); - addchr(cv, (chr)'x'); - addchr(cv, (chr)'y'); - if (cases) { - addchr(cv, (chr)'X'); - addchr(cv, (chr)'Y'); - } - return cv; - } - - /* no equivalence class by that name */ - if (cases) - return allcases(v, c); - cv = getcvec(v, 1, 0); - assert(cv != NULL); - addchr(cv, (chr)c); - return cv; -} - -/* - - cclass - supply cvec for a character class - * Must include case counterparts on request. - * This is all dummy code. - ^ static struct cvec *cclass(struct vars *, chr *, chr *, int); - */ -static struct cvec * -cclass(v, startp, endp, cases) -struct vars *v; -chr *startp; /* where the name starts */ -chr *endp; /* just past the end of the name */ -int cases; /* case-independent? */ -{ - size_t len; - register char *p; - register struct cclass *cc; - int hasch; - struct cvec *cv; - Tcl_DString ds; - char *name; - - /* check out the name */ - len = endp - startp; - - Tcl_DStringInit(&ds); - name = TclUniCharToUtfDString(startp, (int) len, &ds); - - if (cases && len == 5 && (strncmp("lower", name, 5) == 0 || - strncmp("upper", name, 5) == 0)) - name = "alpha"; - for (cc = cclasses; cc->name != NULL; cc++) { - if (strlen(cc->name) == len && strncmp(cc->name, name, len) == 0) { - break; - } - } - Tcl_DStringFree(&ds); - - if (cc->name == NULL) { - ERR(REG_ECTYPE); - return NULL; - } - - /* set up vector */ - hasch = (v->cflags®_FAKE) ? cc->hasch : 0; - cv = getcvec(v, (int) strlen(cc->chars), hasch); - if (cv == NULL) { - ERR(REG_ESPACE); - return NULL; - } - - /* fill it in */ - for (p = cc->chars; *p != '\0'; p++) - addchr(cv, (chr)*p); - if (hasch) - addce(cv, ch()); - - return cv; -} - -/* - - allcases - supply cvec for all case counterparts of a chr (including itself) - * This is a shortcut, preferably an efficient one, for simple characters; - * messy cases are done via range(). - * This is all dummy code. - ^ static struct cvec *allcases(struct vars *, pchr); - */ -static struct cvec * -allcases(v, c) -struct vars *v; -pchr c; -{ - struct cvec *cv = getcvec(v, 2, 0); - - assert(cv != NULL); - addchr(cv, c); - if (TclUniCharIsUpper((Tcl_UniChar)c)) - addchr(cv, (chr)Tcl_UniCharToLower((Tcl_UniChar)c)); - else if (TclUniCharIsLower((Tcl_UniChar)c)) - addchr(cv, (chr)Tcl_UniCharToUpper((Tcl_UniChar)c)); - - return cv; -} - -/* - - sncmp - case-independent chr-string compare - * REG_ICASE backrefs need this. It should preferably be efficient. - * This is all dummy code. - ^ static int sncmp(CONST chr *, CONST chr *, size_t); - */ -static int /* -1, 0, 1 for <, =, > */ -sncmp(x, y, len) -CONST chr *x; -CONST chr *y; -size_t len; /* maximum length of comparison */ -{ - int diff; - size_t i; - - for (i = 0; i < len; i++) { - diff = Tcl_UniCharToLower(x[i]) - Tcl_UniCharToLower(y[i]); - if (diff) { - return diff; - } - } - return 0; -} - -/* - * Utility functions for handling cvecs - */ - -/* - - newcvec - allocate a new cvec - ^ static struct cvec *newcvec(int, int); - */ -static struct cvec * -newcvec(nchrs, nces) -int nchrs; /* to hold this many chrs... */ -int nces; /* ... and this many CEs */ -{ - size_t n; - size_t nc; - struct cvec *cv; - - nc = (size_t)nchrs + (size_t)nces*(MAXCE+1); - n = sizeof(struct cvec) + (size_t)(nces-1)*sizeof(chr *) + - nc*sizeof(chr); - cv = (struct cvec *)ckalloc(n); - if (cv == NULL) - return NULL; - cv->chrspace = nc; - cv->chrs = (chr *)&cv->ces[nces]; /* chrs just after CE ptrs */ - cv->cespace = nces; - return clearcvec(cv); -} - -/* - - clearcvec - clear a possibly-new cvec - * Returns pointer as convenience. - ^ static struct cvec *clearcvec(struct cvec *); - */ -static struct cvec * -clearcvec(cv) -struct cvec *cv; -{ - int i; - - assert(cv != NULL); - cv->nchrs = 0; - assert(cv->chrs == (chr *)&cv->ces[cv->cespace]); - cv->nces = 0; - cv->ncechrs = 0; - for (i = 0; i < cv->cespace; i++) - cv->ces[i] = NULL; - - return cv; -} - -/* - - addchr - add a chr to a cvec - ^ static VOID addchr(struct cvec *, pchr); - */ -static VOID -addchr(cv, c) -struct cvec *cv; -pchr c; -{ - assert(cv->nchrs < cv->chrspace - cv->ncechrs); - cv->chrs[cv->nchrs++] = (chr) c; -} - -/* - - addce - add a CE to a cvec - ^ static VOID addce(struct cvec *, chr *); - */ -static VOID -addce(cv, startp) -struct cvec *cv; -chr *startp; /* 0-terminated text */ -{ - int n = wcslen(startp); - int i; - chr *s; - chr *d; - - assert(n > 0); - assert(cv->nchrs + n < cv->chrspace - cv->ncechrs); - assert(cv->nces < cv->cespace); - d = &cv->chrs[cv->chrspace - cv->ncechrs - n - 1]; - cv->ces[cv->nces++] = d; - for (s = startp, i = n; i > 0; s++, i--) - *d++ = *s; - *d = 0; /* endmarker */ - assert(d == &cv->chrs[cv->chrspace - cv->ncechrs]); - cv->ncechrs += n + 1; -} - -/* - - haschr - does a cvec contain this chr? - ^ static int haschr(struct cvec *, pchr); - */ -static int /* predicate */ -haschr(cv, c) -struct cvec *cv; -pchr c; -{ - int i; - chr *p; - - for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) - if (*p == c) - return 1; - return 0; -} - -/* - - getcvec - get a cvec, remembering it as v->cv - ^ static struct cvec *getcvec(struct vars *, int, int); - */ -static struct cvec * -getcvec(v, nchrs, nces) -struct vars *v; -int nchrs; /* to hold this many chrs... */ -int nces; /* ... and this many CEs */ -{ - if (v->cv != NULL && nchrs <= v->cv->chrspace && nces <= v->cv->cespace) - return clearcvec(v->cv); - - if (v->cv != NULL) - freecvec(v->cv); - v->cv = newcvec(nchrs, nces); - if (v->cv == NULL) - ERR(REG_ESPACE); - - return v->cv; -} - -/* - - freecvec - free a cvec - ^ static VOID freecvec(struct cvec *); - */ -static VOID -freecvec(cv) -struct cvec *cv; -{ - ckfree((char *)cv); -} |