diff options
author | Hye-Shik Chang <hyeshik@gmail.com> | 2004-01-17 14:29:29 (GMT) |
---|---|---|
committer | Hye-Shik Chang <hyeshik@gmail.com> | 2004-01-17 14:29:29 (GMT) |
commit | 3e2a30692085d32ac63f72b35da39158a471fc68 (patch) | |
tree | 4cbe735f61eae87ac56a13ca6bd32113b98bd03d /Modules/cjkcodecs/_hz.c | |
parent | cd1f7430cb8f48de970021071d7683054c23b10f (diff) | |
download | cpython-3e2a30692085d32ac63f72b35da39158a471fc68.zip cpython-3e2a30692085d32ac63f72b35da39158a471fc68.tar.gz cpython-3e2a30692085d32ac63f72b35da39158a471fc68.tar.bz2 |
Add CJK codecs support as discussed on python-dev. (SF #873597)
Several style fixes are suggested by Martin v. Loewis and
Marc-Andre Lemburg. Thanks!
Diffstat (limited to 'Modules/cjkcodecs/_hz.c')
-rw-r--r-- | Modules/cjkcodecs/_hz.c | 134 |
1 files changed, 134 insertions, 0 deletions
diff --git a/Modules/cjkcodecs/_hz.c b/Modules/cjkcodecs/_hz.c new file mode 100644 index 0000000..50805b1 --- /dev/null +++ b/Modules/cjkcodecs/_hz.c @@ -0,0 +1,134 @@ +/* + * _hz.c: the HZ codec (RFC1843) + * + * Written by Hye-Shik Chang <perky@FreeBSD.org> + * $CJKCodecs: _hz.c,v 1.2 2003/12/31 05:46:55 perky Exp $ + */ + +#include "codeccommon.h" + +ENCMAP(gbcommon) +DECMAP(gb2312) + +#define HAVE_ENCODER_INIT +ENCODER_INIT(hz) +{ + state->i = 0; + return 0; +} + +#define HAVE_ENCODER_RESET +ENCODER_RESET(hz) +{ + if (state->i != 0) { + WRITE2('~', '}') + state->i = 0; + NEXT_OUT(2) + } + return 0; +} + +ENCODER(hz) +{ + while (inleft > 0) { + Py_UNICODE c = IN1; + DBCHAR code; + + if (c < 0x80) { + if (state->i == 0) { + WRITE1(c) + NEXT(1, 1) + } else { + WRITE3('~', '}', c) + NEXT(1, 3) + state->i = 0; + } + continue; + } + + UCS4INVALID(c) + + TRYMAP_ENC(gbcommon, code, c); + else return 1; + + if (code & 0x8000) /* MSB set: GBK */ + return 1; + + if (state->i == 0) { + WRITE4('~', '{', code >> 8, code & 0xff) + NEXT(1, 4) + state->i = 1; + } else { + WRITE2(code >> 8, code & 0xff) + NEXT(1, 2) + } + } + + return 0; +} + +#define HAVE_DECODER_INIT +DECODER_INIT(hz) +{ + state->i = 0; + return 0; +} + +#define HAVE_DECODER_RESET +DECODER_RESET(hz) +{ + state->i = 0; + return 0; +} + +DECODER(hz) +{ + while (inleft > 0) { + unsigned char c = IN1; + + if (c == '~') { + unsigned char c2 = IN2; + + RESERVE_INBUF(2) + if (c2 == '~') { + WRITE1('~') + NEXT(2, 1) + continue; + } else if (c2 == '{' && state->i == 0) + state->i = 1; /* set GB */ + else if (c2 == '}' && state->i == 1) + state->i = 0; /* set ASCII */ + else if (c2 == '\n') + ; /* line-continuation */ + else + return 2; + NEXT(2, 0); + continue; + } + + if (c & 0x80) + return 1; + + if (state->i == 0) { /* ASCII mode */ + WRITE1(c) + NEXT(1, 1) + } else { /* GB mode */ + RESERVE_INBUF(2) + RESERVE_OUTBUF(1) + TRYMAP_DEC(gb2312, **outbuf, c, IN2) { + NEXT(2, 1) + } else + return 2; + } + } + + return 0; +} + +#include "codecentry.h" +BEGIN_CODEC_REGISTRY(hz) + MAPOPEN(zh_CN) + IMPORTMAP_DEC(gb2312) + IMPORTMAP_ENC(gbcommon) + MAPCLOSE() +END_CODEC_REGISTRY(hz) |