diff options
author | Hye-Shik Chang <hyeshik@gmail.com> | 2004-01-17 14:29:29 (GMT) |
---|---|---|
committer | Hye-Shik Chang <hyeshik@gmail.com> | 2004-01-17 14:29:29 (GMT) |
commit | 3e2a30692085d32ac63f72b35da39158a471fc68 (patch) | |
tree | 4cbe735f61eae87ac56a13ca6bd32113b98bd03d /Modules/cjkcodecs/_shift_jis.c | |
parent | cd1f7430cb8f48de970021071d7683054c23b10f (diff) | |
download | cpython-3e2a30692085d32ac63f72b35da39158a471fc68.zip cpython-3e2a30692085d32ac63f72b35da39158a471fc68.tar.gz cpython-3e2a30692085d32ac63f72b35da39158a471fc68.tar.bz2 |
Add CJK codecs support as discussed on python-dev. (SF #873597)
Several style fixes are suggested by Martin v. Loewis and
Marc-Andre Lemburg. Thanks!
Diffstat (limited to 'Modules/cjkcodecs/_shift_jis.c')
-rw-r--r-- | Modules/cjkcodecs/_shift_jis.c | 121 |
1 files changed, 121 insertions, 0 deletions
diff --git a/Modules/cjkcodecs/_shift_jis.c b/Modules/cjkcodecs/_shift_jis.c new file mode 100644 index 0000000..0bdee3f --- /dev/null +++ b/Modules/cjkcodecs/_shift_jis.c @@ -0,0 +1,121 @@ +/* + * _shift_jis.c: the SHIFT-JIS codec + * + * Written by Hye-Shik Chang <perky@FreeBSD.org> + * $CJKCodecs: _shift_jis.c,v 1.4 2003/12/31 05:46:55 perky Exp $ + */ + +#include "codeccommon.h" +#include "alg_jisx0201.h" + +ENCMAP(jisxcommon) +DECMAP(jisx0208) + +ENCODER(shift_jis) +{ + while (inleft > 0) { + Py_UNICODE c = IN1; + DBCHAR code; + unsigned char c1, c2; + +#ifdef STRICT_BUILD + JISX0201_R_ENCODE(c, code) +#else + if (c < 0x80) code = c; + else if (c == 0x00a5) code = 0x5c; /* YEN SIGN */ + else if (c == 0x203e) code = 0x7e; /* OVERLINE */ +#endif + else JISX0201_K_ENCODE(c, code) + else UCS4INVALID(c) + else code = NOCHAR; + + if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) { + RESERVE_OUTBUF(1) + + OUT1(code) + NEXT(1, 1) + continue; + } + + RESERVE_OUTBUF(2) + + if (code == NOCHAR) { + TRYMAP_ENC(jisxcommon, code, c); +#ifndef STRICT_BUILD + else if (c == 0xff3c) + code = 0x2140; /* FULL-WIDTH REVERSE SOLIDUS */ +#endif + else + return 1; + + if (code & 0x8000) /* MSB set: JIS X 0212 */ + return 1; + } + + c1 = code >> 8; + c2 = code & 0xff; + c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21); + c1 = (c1 - 0x21) >> 1; + OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1) + OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41) + NEXT(1, 2) + } + + return 0; +} + +DECODER(shift_jis) +{ + while (inleft > 0) { + unsigned char c = IN1; + + RESERVE_OUTBUF(1) + +#ifdef STRICT_BUILD + JISX0201_R_DECODE(c, **outbuf) +#else + if (c < 0x80) **outbuf = c; +#endif + else JISX0201_K_DECODE(c, **outbuf) + else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)) { + unsigned char c1, c2; + + RESERVE_INBUF(2) + c2 = IN2; + if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) + return 2; + + c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1); + c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); + c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1) + 0x21); + c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21; + +#ifndef STRICT_BUILD + if (c1 == 0x21 && c2 == 0x40) { + /* FULL-WIDTH REVERSE SOLIDUS */ + OUT1(0xff3c) + NEXT(2, 1) + continue; + } +#endif + TRYMAP_DEC(jisx0208, **outbuf, c1, c2) { + NEXT(2, 1) + continue; + } else + return 2; + } else + return 2; + + NEXT(1, 1) /* JIS X 0201 */ + } + + return 0; +} + +#include "codecentry.h" +BEGIN_CODEC_REGISTRY(shift_jis) + MAPOPEN(ja_JP) + IMPORTMAP_DEC(jisx0208) + IMPORTMAP_ENC(jisxcommon) + MAPCLOSE() +END_CODEC_REGISTRY(shift_jis) |