diff options
Diffstat (limited to 'Modules/cjkcodecs/_iso2022_jp_2.c')
-rw-r--r-- | Modules/cjkcodecs/_iso2022_jp_2.c | 230 |
1 files changed, 230 insertions, 0 deletions
diff --git a/Modules/cjkcodecs/_iso2022_jp_2.c b/Modules/cjkcodecs/_iso2022_jp_2.c new file mode 100644 index 0000000..d16aa67 --- /dev/null +++ b/Modules/cjkcodecs/_iso2022_jp_2.c @@ -0,0 +1,230 @@ +/* + * _iso2022_jp_2.c: the ISO-2022-JP-2 codec (RFC1554) + * + * Written by Hye-Shik Chang <perky@FreeBSD.org> + * $CJKCodecs: _iso2022_jp_2.c,v 1.8 2003/12/31 05:46:55 perky Exp $ + */ + +#define ISO2022_DESIGNATIONS \ + CHARSET_ASCII, CHARSET_JISX0201_R, CHARSET_JISX0208, \ + CHARSET_JISX0208_O, CHARSET_JISX0212, CHARSET_GB2312, \ + CHARSET_KSX1001, CHARSET_JISX0212, \ + CHARSET_ISO8859_1, CHARSET_ISO8859_7 +#define ISO2022_USE_G2_DESIGNATION yo! +#define ISO2022_USE_JISX0208EXT + +#include "codeccommon.h" +#include "iso2022common.h" +#include "alg_jisx0201.h" +#include "alg_iso8859_1.h" +#include "alg_iso8859_7.h" + +ENCMAP(jisxcommon) +DECMAP(jisx0208) +DECMAP(jisx0212) +ENCMAP(cp949) +DECMAP(ksx1001) +ENCMAP(gbcommon) +DECMAP(gb2312) + +#define HAVE_ENCODER_INIT +ENCODER_INIT(iso2022_jp_2) +{ + STATE_CLEARFLAGS(state) + STATE_SETG0(state, CHARSET_ASCII) + STATE_SETG1(state, CHARSET_ASCII) + STATE_SETG2(state, CHARSET_ASCII) + return 0; +} + +#define HAVE_ENCODER_RESET +ENCODER_RESET(iso2022_jp_2) +{ + if (STATE_GETG0(state) != CHARSET_ASCII) { + WRITE3(ESC, '(', 'B') + STATE_SETG0(state, CHARSET_ASCII) + NEXT_OUT(3) + } + return 0; +} + +ENCODER(iso2022_jp_2) +{ + while (inleft > 0) { + Py_UNICODE c = IN1; + DBCHAR code; + + if (c < 0x80) { + switch (STATE_GETG0(state)) { + case CHARSET_ASCII: + WRITE1(c) + NEXT(1, 1) + break; + case CHARSET_JISX0201_R: + JISX0201_R_ENCODE(c, code) + else { /* FALLTHROUGH (yay!) */ + default: + WRITE3(ESC, '(', 'B') + NEXT_OUT(3) + STATE_SETG0(state, CHARSET_ASCII) + code = c; + } + WRITE1(code) + NEXT(1, 1) + break; + } + if (c == '\n') + STATE_CLEARFLAG(state, F_SHIFTED) + } + else UCS4INVALID(c) + else { + unsigned char charset; + + charset = STATE_GETG0(state); + if (charset == CHARSET_JISX0201_R) { + code = DBCINV; + JISX0201_R_ENCODE(c, code) + if (code != DBCINV) { + WRITE1(code) + NEXT(1, 1) + continue; + } + } + + TRYMAP_ENC(jisxcommon, code, c) { + if (code & 0x8000) { /* MSB set: JIS X 0212 */ + if (charset != CHARSET_JISX0212) { + WRITE4(ESC, '$', '(', 'D') + STATE_SETG0(state, CHARSET_JISX0212) + NEXT_OUT(4) + } + WRITE2((code >> 8) & 0x7f, code & 0x7f) + } else { /* MSB unset: JIS X 0208 */ +jisx0208encode: if (charset != CHARSET_JISX0208) { + WRITE3(ESC, '$', 'B') + STATE_SETG0(state, CHARSET_JISX0208) + NEXT_OUT(3) + } + WRITE2(code >> 8, code & 0xff) + } + NEXT(1, 2) + } else TRYMAP_ENC(cp949, code, c) { + if (code & 0x8000) /* MSB set: CP949 */ + return 2; + if (charset != CHARSET_KSX1001) { + WRITE4(ESC, '$', '(', 'C') + STATE_SETG0(state, CHARSET_KSX1001) + NEXT_OUT(4) + } + WRITE2(code >> 8, code & 0xff) + NEXT(1, 2) + } else TRYMAP_ENC(gbcommon, code, c) { + if (code & 0x8000) /* MSB set: GBK */ + return 2; + if (charset != CHARSET_GB2312) { + WRITE4(ESC, '$', '(', 'A') + STATE_SETG0(state, CHARSET_GB2312) + NEXT_OUT(4) + } + WRITE2(code >> 8, code & 0xff) + NEXT(1, 2) + } else if (c == 0xff3c) { /* FULL-WIDTH REVERSE SOLIDUS */ + code = 0x2140; + goto jisx0208encode; + } else { + JISX0201_R_ENCODE(c, code) + else { + /* There's no need to try to encode as ISO-8859-1 or + * ISO-8859-7 because JIS X 0212 includes them already. + */ + return 1; + } + /* if (charset == CHARSET_JISX0201_R) : already checked */ + WRITE4(ESC, '(', 'J', code) + STATE_SETG0(state, CHARSET_JISX0201_R) + NEXT(1, 4) + } + } + } + + return 0; +} + +#define HAVE_DECODER_INIT +DECODER_INIT(iso2022_jp_2) +{ + STATE_CLEARFLAGS(state) + STATE_SETG0(state, CHARSET_ASCII) + STATE_SETG1(state, CHARSET_ASCII) + STATE_SETG2(state, CHARSET_ASCII) + return 0; +} + +#define HAVE_DECODER_RESET +DECODER_RESET(iso2022_jp_2) +{ + STATE_CLEARFLAG(state, F_SHIFTED) + return 0; +} + +DECODER(iso2022_jp_2) +{ + ISO2022_LOOP_BEGIN + unsigned char charset, c2; + + ISO2022_GETCHARSET(charset, c) + + if (charset & CHARSET_DOUBLEBYTE) { + RESERVE_INBUF(2) + RESERVE_OUTBUF(1) + c2 = IN2; + if (charset == CHARSET_JISX0208 || charset == CHARSET_JISX0208_O) { + if (c == 0x21 && c2 == 0x40) /* FULL-WIDTH REVERSE SOLIDUS */ + **outbuf = 0xff3c; + else TRYMAP_DEC(jisx0208, **outbuf, c, c2); + else return 2; + } else if (charset == CHARSET_JISX0212) { + TRYMAP_DEC(jisx0212, **outbuf, c, c2); + else return 2; + } else if (charset == CHARSET_KSX1001) { + TRYMAP_DEC(ksx1001, **outbuf, c, c2); + else return 2; + } else if (charset == CHARSET_GB2312) { + TRYMAP_DEC(gb2312, **outbuf, c, c2); + else return 2; + } else + return MBERR_INTERNAL; + NEXT(2, 1) + } else if (charset == CHARSET_ASCII) { + RESERVE_OUTBUF(1) + OUT1(c) + NEXT(1, 1) + } else if (charset == CHARSET_JISX0201_R) { + RESERVE_OUTBUF(1) + JISX0201_R_DECODE(c, **outbuf) + else + return 1; + NEXT(1, 1) + } else + return MBERR_INTERNAL; + ISO2022_LOOP_END + + return 0; +} + +#include "codecentry.h" +BEGIN_CODEC_REGISTRY(iso2022_jp_2) + MAPOPEN(ja_JP) + IMPORTMAP_DEC(jisx0208) + IMPORTMAP_DEC(jisx0212) + IMPORTMAP_ENC(jisxcommon) + MAPCLOSE() + MAPOPEN(ko_KR) + IMPORTMAP_ENC(cp949) + IMPORTMAP_DEC(ksx1001) + MAPCLOSE() + MAPOPEN(zh_CN) + IMPORTMAP_ENC(gbcommon) + IMPORTMAP_DEC(gb2312) + MAPCLOSE() +END_CODEC_REGISTRY(iso2022_jp_2) |