summaryrefslogtreecommitdiffstats
path: root/Modules/cjkcodecs/_iso2022_jp_2.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/cjkcodecs/_iso2022_jp_2.c')
-rw-r--r--Modules/cjkcodecs/_iso2022_jp_2.c230
1 files changed, 230 insertions, 0 deletions
diff --git a/Modules/cjkcodecs/_iso2022_jp_2.c b/Modules/cjkcodecs/_iso2022_jp_2.c
new file mode 100644
index 0000000..d16aa67
--- /dev/null
+++ b/Modules/cjkcodecs/_iso2022_jp_2.c
@@ -0,0 +1,230 @@
+/*
+ * _iso2022_jp_2.c: the ISO-2022-JP-2 codec (RFC1554)
+ *
+ * Written by Hye-Shik Chang <perky@FreeBSD.org>
+ * $CJKCodecs: _iso2022_jp_2.c,v 1.8 2003/12/31 05:46:55 perky Exp $
+ */
+
+#define ISO2022_DESIGNATIONS \
+ CHARSET_ASCII, CHARSET_JISX0201_R, CHARSET_JISX0208, \
+ CHARSET_JISX0208_O, CHARSET_JISX0212, CHARSET_GB2312, \
+ CHARSET_KSX1001, CHARSET_JISX0212, \
+ CHARSET_ISO8859_1, CHARSET_ISO8859_7
+#define ISO2022_USE_G2_DESIGNATION yo!
+#define ISO2022_USE_JISX0208EXT
+
+#include "codeccommon.h"
+#include "iso2022common.h"
+#include "alg_jisx0201.h"
+#include "alg_iso8859_1.h"
+#include "alg_iso8859_7.h"
+
+ENCMAP(jisxcommon)
+DECMAP(jisx0208)
+DECMAP(jisx0212)
+ENCMAP(cp949)
+DECMAP(ksx1001)
+ENCMAP(gbcommon)
+DECMAP(gb2312)
+
+#define HAVE_ENCODER_INIT
+ENCODER_INIT(iso2022_jp_2)
+{
+ STATE_CLEARFLAGS(state)
+ STATE_SETG0(state, CHARSET_ASCII)
+ STATE_SETG1(state, CHARSET_ASCII)
+ STATE_SETG2(state, CHARSET_ASCII)
+ return 0;
+}
+
+#define HAVE_ENCODER_RESET
+ENCODER_RESET(iso2022_jp_2)
+{
+ if (STATE_GETG0(state) != CHARSET_ASCII) {
+ WRITE3(ESC, '(', 'B')
+ STATE_SETG0(state, CHARSET_ASCII)
+ NEXT_OUT(3)
+ }
+ return 0;
+}
+
+ENCODER(iso2022_jp_2)
+{
+ while (inleft > 0) {
+ Py_UNICODE c = IN1;
+ DBCHAR code;
+
+ if (c < 0x80) {
+ switch (STATE_GETG0(state)) {
+ case CHARSET_ASCII:
+ WRITE1(c)
+ NEXT(1, 1)
+ break;
+ case CHARSET_JISX0201_R:
+ JISX0201_R_ENCODE(c, code)
+ else { /* FALLTHROUGH (yay!) */
+ default:
+ WRITE3(ESC, '(', 'B')
+ NEXT_OUT(3)
+ STATE_SETG0(state, CHARSET_ASCII)
+ code = c;
+ }
+ WRITE1(code)
+ NEXT(1, 1)
+ break;
+ }
+ if (c == '\n')
+ STATE_CLEARFLAG(state, F_SHIFTED)
+ }
+ else UCS4INVALID(c)
+ else {
+ unsigned char charset;
+
+ charset = STATE_GETG0(state);
+ if (charset == CHARSET_JISX0201_R) {
+ code = DBCINV;
+ JISX0201_R_ENCODE(c, code)
+ if (code != DBCINV) {
+ WRITE1(code)
+ NEXT(1, 1)
+ continue;
+ }
+ }
+
+ TRYMAP_ENC(jisxcommon, code, c) {
+ if (code & 0x8000) { /* MSB set: JIS X 0212 */
+ if (charset != CHARSET_JISX0212) {
+ WRITE4(ESC, '$', '(', 'D')
+ STATE_SETG0(state, CHARSET_JISX0212)
+ NEXT_OUT(4)
+ }
+ WRITE2((code >> 8) & 0x7f, code & 0x7f)
+ } else { /* MSB unset: JIS X 0208 */
+jisx0208encode: if (charset != CHARSET_JISX0208) {
+ WRITE3(ESC, '$', 'B')
+ STATE_SETG0(state, CHARSET_JISX0208)
+ NEXT_OUT(3)
+ }
+ WRITE2(code >> 8, code & 0xff)
+ }
+ NEXT(1, 2)
+ } else TRYMAP_ENC(cp949, code, c) {
+ if (code & 0x8000) /* MSB set: CP949 */
+ return 2;
+ if (charset != CHARSET_KSX1001) {
+ WRITE4(ESC, '$', '(', 'C')
+ STATE_SETG0(state, CHARSET_KSX1001)
+ NEXT_OUT(4)
+ }
+ WRITE2(code >> 8, code & 0xff)
+ NEXT(1, 2)
+ } else TRYMAP_ENC(gbcommon, code, c) {
+ if (code & 0x8000) /* MSB set: GBK */
+ return 2;
+ if (charset != CHARSET_GB2312) {
+ WRITE4(ESC, '$', '(', 'A')
+ STATE_SETG0(state, CHARSET_GB2312)
+ NEXT_OUT(4)
+ }
+ WRITE2(code >> 8, code & 0xff)
+ NEXT(1, 2)
+ } else if (c == 0xff3c) { /* FULL-WIDTH REVERSE SOLIDUS */
+ code = 0x2140;
+ goto jisx0208encode;
+ } else {
+ JISX0201_R_ENCODE(c, code)
+ else {
+ /* There's no need to try to encode as ISO-8859-1 or
+ * ISO-8859-7 because JIS X 0212 includes them already.
+ */
+ return 1;
+ }
+ /* if (charset == CHARSET_JISX0201_R) : already checked */
+ WRITE4(ESC, '(', 'J', code)
+ STATE_SETG0(state, CHARSET_JISX0201_R)
+ NEXT(1, 4)
+ }
+ }
+ }
+
+ return 0;
+}
+
+#define HAVE_DECODER_INIT
+DECODER_INIT(iso2022_jp_2)
+{
+ STATE_CLEARFLAGS(state)
+ STATE_SETG0(state, CHARSET_ASCII)
+ STATE_SETG1(state, CHARSET_ASCII)
+ STATE_SETG2(state, CHARSET_ASCII)
+ return 0;
+}
+
+#define HAVE_DECODER_RESET
+DECODER_RESET(iso2022_jp_2)
+{
+ STATE_CLEARFLAG(state, F_SHIFTED)
+ return 0;
+}
+
+DECODER(iso2022_jp_2)
+{
+ ISO2022_LOOP_BEGIN
+ unsigned char charset, c2;
+
+ ISO2022_GETCHARSET(charset, c)
+
+ if (charset & CHARSET_DOUBLEBYTE) {
+ RESERVE_INBUF(2)
+ RESERVE_OUTBUF(1)
+ c2 = IN2;
+ if (charset == CHARSET_JISX0208 || charset == CHARSET_JISX0208_O) {
+ if (c == 0x21 && c2 == 0x40) /* FULL-WIDTH REVERSE SOLIDUS */
+ **outbuf = 0xff3c;
+ else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
+ else return 2;
+ } else if (charset == CHARSET_JISX0212) {
+ TRYMAP_DEC(jisx0212, **outbuf, c, c2);
+ else return 2;
+ } else if (charset == CHARSET_KSX1001) {
+ TRYMAP_DEC(ksx1001, **outbuf, c, c2);
+ else return 2;
+ } else if (charset == CHARSET_GB2312) {
+ TRYMAP_DEC(gb2312, **outbuf, c, c2);
+ else return 2;
+ } else
+ return MBERR_INTERNAL;
+ NEXT(2, 1)
+ } else if (charset == CHARSET_ASCII) {
+ RESERVE_OUTBUF(1)
+ OUT1(c)
+ NEXT(1, 1)
+ } else if (charset == CHARSET_JISX0201_R) {
+ RESERVE_OUTBUF(1)
+ JISX0201_R_DECODE(c, **outbuf)
+ else
+ return 1;
+ NEXT(1, 1)
+ } else
+ return MBERR_INTERNAL;
+ ISO2022_LOOP_END
+
+ return 0;
+}
+
+#include "codecentry.h"
+BEGIN_CODEC_REGISTRY(iso2022_jp_2)
+ MAPOPEN(ja_JP)
+ IMPORTMAP_DEC(jisx0208)
+ IMPORTMAP_DEC(jisx0212)
+ IMPORTMAP_ENC(jisxcommon)
+ MAPCLOSE()
+ MAPOPEN(ko_KR)
+ IMPORTMAP_ENC(cp949)
+ IMPORTMAP_DEC(ksx1001)
+ MAPCLOSE()
+ MAPOPEN(zh_CN)
+ IMPORTMAP_ENC(gbcommon)
+ IMPORTMAP_DEC(gb2312)
+ MAPCLOSE()
+END_CODEC_REGISTRY(iso2022_jp_2)