diff options
Diffstat (limited to 'Modules/cjkcodecs/iso2022common.h')
-rw-r--r-- | Modules/cjkcodecs/iso2022common.h | 256 |
1 files changed, 0 insertions, 256 deletions
diff --git a/Modules/cjkcodecs/iso2022common.h b/Modules/cjkcodecs/iso2022common.h deleted file mode 100644 index e042d80..0000000 --- a/Modules/cjkcodecs/iso2022common.h +++ /dev/null @@ -1,256 +0,0 @@ -/* - * iso2022common.h: Common Codec Routines for ISO-2022 codecs. - * - * Written by Hye-Shik Chang <perky@FreeBSD.org> - * $CJKCodecs: iso2022common.h,v 1.8 2003/12/31 05:46:55 perky Exp $ - */ - -/* This ISO-2022 implementation is intended to comply ECMA-43 Level 1 - * rather than RFCs itself */ - -#define ESC 0x1b -#define SO 0x0e -#define SI 0x0f - -#define MAX_ESCSEQLEN 16 - -#define IS_ESCEND(c) (((c) >= 'A' && (c) <= 'Z') || (c) == '@') -#define IS_ISO2022ESC(c2) ((c2) == '(' || (c2) == ')' || (c2) == '$' || \ - (c2) == '.' || (c2) == '&') - /* this is not a full list of ISO-2022 escape sequence headers. - * but, it's enough to implement CJK instances of iso-2022. */ - -/* STATE - - state->c[0-3] - - 00000000 - ||^^^^^| - |+-----+---- G0-3 Character Set - +----------- Is G0-3 double byte? - - state->c[4] - - 00000000 - || - |+---- Locked-Shift? - +----- ESC Throughout -*/ - -#define CHARSET_DOUBLEBYTE 0x80 - -#define CHARSET_ASCII 'B' - -#define CHARSET_ISO8859_1 'A' -#define CHARSET_ISO8859_7 'F' - -#define CHARSET_KSX1001 ('C'|CHARSET_DOUBLEBYTE) - -#define CHARSET_JISX0201_R 'J' -#define CHARSET_JISX0201_K 'I' -#define CHARSET_JISX0208 ('B'|CHARSET_DOUBLEBYTE) -#define CHARSET_JISX0208_O ('@'|CHARSET_DOUBLEBYTE) -#define CHARSET_JISX0212 ('D'|CHARSET_DOUBLEBYTE) -#define CHARSET_JISX0213_1 ('O'|CHARSET_DOUBLEBYTE) -#define CHARSET_JISX0213_2 ('P'|CHARSET_DOUBLEBYTE) - -#define CHARSET_GB2312 ('A'|CHARSET_DOUBLEBYTE) -#define CHARSET_GB2312_8565 ('E'|CHARSET_DOUBLEBYTE) - -#define CHARSET_DESIGN(c) ((c) & 0x7f) -#define CHARSET_ISDBCS(c) ((c) & 0x80) - -#define F_SHIFTED 0x01 -#define F_ESCTHROUGHOUT 0x02 - -#define STATE_SETG(dn, s, v) ((s)->c[dn]) = (v); -#define STATE_GETG(dn, s) ((s)->c[dn]) - -#define STATE_SETG0(s, v) STATE_SETG(0, s, v) -#define STATE_GETG0(s) STATE_GETG(0, s) -#define STATE_SETG1(s, v) STATE_SETG(1, s, v) -#define STATE_GETG1(s) STATE_GETG(1, s) -#define STATE_SETG2(s, v) STATE_SETG(2, s, v) -#define STATE_GETG2(s) STATE_GETG(2, s) -#define STATE_SETG3(s, v) STATE_SETG(3, s, v) -#define STATE_GETG3(s) STATE_GETG(3, s) - -#define STATE_SETFLAG(s, f) ((s)->c[4]) |= (f); -#define STATE_GETFLAG(s, f) ((s)->c[4] & (f)) -#define STATE_CLEARFLAG(s, f) ((s)->c[4]) &= ~(f); -#define STATE_CLEARFLAGS(s) ((s)->c[4]) = 0; - -#define ISO2022_GETCHARSET(charset, c1) \ - if ((c) >= 0x80) \ - return 1; \ - if (STATE_GETFLAG(state, F_SHIFTED)) /* G1 */ \ - (charset) = STATE_GETG1(state); \ - else /* G1 */ \ - (charset) = STATE_GETG0(state); \ - -#ifdef ISO2022_USE_G2_DESIGNATION -/* hardcoded for iso-2022-jp-2 for now. we'll need to generalize it - when we have more G2 designating encodings */ -#define SS2_ROUTINE \ - if (IN2 == 'N') { /* SS2 */ \ - RESERVE_INBUF(3) \ - if (STATE_GETG2(state) == CHARSET_ISO8859_1) { \ - ISO8859_1_DECODE(IN3 ^ 0x80, **outbuf) \ - else return 3; \ - } else if (STATE_GETG2(state) == CHARSET_ISO8859_7) { \ - ISO8859_7_DECODE(IN3 ^ 0x80, **outbuf) \ - else return 3; \ - } else if (STATE_GETG2(state) == CHARSET_ASCII) { \ - if (IN3 & 0x80) return 3; \ - else **outbuf = IN3; \ - } else \ - return MBERR_INTERNAL; \ - NEXT(3, 1) \ - } else -#else -#define SS2_ROUTINE -#endif - -#ifndef ISO2022_NO_SHIFT -#define SHIFT_CASES \ - case SI: \ - STATE_CLEARFLAG(state, F_SHIFTED) \ - NEXT_IN(1) \ - break; \ - case SO: \ - STATE_SETFLAG(state, F_SHIFTED) \ - NEXT_IN(1) \ - break; -#else -/* for compatibility with JapaneseCodecs */ -#define SHIFT_CASES -#endif - -#define ISO2022_BASECASES(c1) \ - case ESC: \ - RESERVE_INBUF(2) \ - if (IS_ISO2022ESC(IN2)) { \ - int err; \ - err = iso2022processesc(state, inbuf, &inleft); \ - if (err != 0) \ - return err; \ - } else SS2_ROUTINE { \ - STATE_SETFLAG(state, F_ESCTHROUGHOUT) \ - OUT1(ESC) \ - NEXT(1, 1) \ - } \ - break; \ - SHIFT_CASES \ - case '\n': \ - STATE_CLEARFLAG(state, F_SHIFTED) \ - WRITE1('\n') \ - NEXT(1, 1) \ - break; - -#define ISO2022_ESCTHROUGHOUT(c) \ - if (STATE_GETFLAG(state, F_ESCTHROUGHOUT)) { \ - /* ESC throughout mode: for non-iso2022 escape sequences */ \ - RESERVE_OUTBUF(1) \ - OUT1(c) /* assume as ISO-8859-1 */ \ - NEXT(1, 1) \ - if (IS_ESCEND(c)) { \ - STATE_CLEARFLAG(state, F_ESCTHROUGHOUT) \ - } \ - continue; \ - } - -#define ISO2022_LOOP_BEGIN \ - while (inleft > 0) { \ - unsigned char c = IN1; \ - ISO2022_ESCTHROUGHOUT(c) \ - switch(c) { \ - ISO2022_BASECASES(c) \ - default: \ - if (c < 0x20) { /* C0 */ \ - RESERVE_OUTBUF(1) \ - OUT1(c) \ - NEXT(1, 1) \ - } else if (c >= 0x80) \ - return 1; \ - else { -#define ISO2022_LOOP_END \ - } \ - } \ - } - -static int -iso2022processesc(MultibyteCodec_State *state, - const unsigned char **inbuf, size_t *inleft) -{ - unsigned char charset, designation; - size_t i, esclen; - - for (i = 1;i < MAX_ESCSEQLEN;i++) { - if (i >= *inleft) - return MBERR_TOOFEW; - if (IS_ESCEND((*inbuf)[i])) { - esclen = i + 1; - break; - } -#ifdef ISO2022_USE_JISX0208EXT - else if (i+1 < *inleft && (*inbuf)[i] == '&' && (*inbuf)[i+1] == '@') - i += 2; -#endif - } - - if (i >= MAX_ESCSEQLEN) - return 1; /* unterminated escape sequence */ - - switch (esclen) { - case 3: - if (IN2 == '$') { - charset = IN3 | CHARSET_DOUBLEBYTE; - designation = 0; - } else { - charset = IN3; - if (IN2 == '(') designation = 0; - else if (IN2 == ')') designation = 1; -#ifdef ISO2022_USE_G2_DESIGNATION - else if (IN2 == '.') designation = 2; -#endif - else return 3; - } - break; - case 4: - if (IN2 != '$') - return 4; - - charset = IN4 | CHARSET_DOUBLEBYTE; - if (IN3 == '(') designation = 0; - else if (IN3 == ')') designation = 1; - else return 4; - break; -#ifdef ISO2022_USE_JISX0208EXT - case 6: /* designation with prefix */ - if ((*inbuf)[3] == ESC && (*inbuf)[4] == '$' && (*inbuf)[5] == 'B') { - charset = 'B' | CHARSET_DOUBLEBYTE; - designation = 0; - } else - return 6; - break; -#endif - default: - return esclen; - } - - { /* raise error when the charset is not designated for this encoding */ - const unsigned char dsgs[] = {ISO2022_DESIGNATIONS, '\x00'}; - - for (i = 0; dsgs[i] != '\x00'; i++) - if (dsgs[i] == charset) - break; - - if (dsgs[i] == '\x00') - return esclen; - } - - STATE_SETG(designation, state, charset) - *inleft -= esclen; - (*inbuf) += esclen; - return 0; -} |