diff options
Diffstat (limited to 'Modules/cjkcodecs/_codecs_iso2022.c')
-rw-r--r-- | Modules/cjkcodecs/_codecs_iso2022.c | 475 |
1 files changed, 247 insertions, 228 deletions
diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c index cbc1542..5c401aa 100644 --- a/Modules/cjkcodecs/_codecs_iso2022.c +++ b/Modules/cjkcodecs/_codecs_iso2022.c @@ -73,7 +73,7 @@ #define F_SHIFTED 0x01 #define F_ESCTHROUGHOUT 0x02 -#define STATE_SETG(dn, v) ((state)->c[dn]) = (v); +#define STATE_SETG(dn, v) do { ((state)->c[dn]) = (v); } while (0) #define STATE_GETG(dn) ((state)->c[dn]) #define STATE_G0 STATE_GETG(0) @@ -85,10 +85,10 @@ #define STATE_SETG2(v) STATE_SETG(2, v) #define STATE_SETG3(v) STATE_SETG(3, v) -#define STATE_SETFLAG(f) ((state)->c[4]) |= (f); +#define STATE_SETFLAG(f) do { ((state)->c[4]) |= (f); } while (0) #define STATE_GETFLAG(f) ((state)->c[4] & (f)) -#define STATE_CLEARFLAG(f) ((state)->c[4]) &= ~(f); -#define STATE_CLEARFLAGS() ((state)->c[4]) = 0; +#define STATE_CLEARFLAG(f) do { ((state)->c[4]) &= ~(f); } while (0) +#define STATE_CLEARFLAGS() do { ((state)->c[4]) = 0; } while (0) #define ISO2022_CONFIG ((const struct iso2022_config *)config) #define CONFIG_ISSET(flag) (ISO2022_CONFIG->flags & (flag)) @@ -102,8 +102,8 @@ /*-*- internal data structures -*-*/ typedef int (*iso2022_init_func)(void); -typedef ucs4_t (*iso2022_decode_func)(const unsigned char *data); -typedef DBCHAR (*iso2022_encode_func)(const ucs4_t *data, Py_ssize_t *length); +typedef Py_UCS4 (*iso2022_decode_func)(const unsigned char *data); +typedef DBCHAR (*iso2022_encode_func)(const Py_UCS4 *data, Py_ssize_t *length); struct iso2022_designation { unsigned char mark; @@ -132,53 +132,52 @@ CODEC_INIT(iso2022) ENCODER_INIT(iso2022) { - STATE_CLEARFLAGS() - STATE_SETG0(CHARSET_ASCII) - STATE_SETG1(CHARSET_ASCII) + STATE_CLEARFLAGS(); + STATE_SETG0(CHARSET_ASCII); + STATE_SETG1(CHARSET_ASCII); return 0; } ENCODER_RESET(iso2022) { if (STATE_GETFLAG(F_SHIFTED)) { - WRITE1(SI) - NEXT_OUT(1) - STATE_CLEARFLAG(F_SHIFTED) + WRITEBYTE1(SI); + NEXT_OUT(1); + STATE_CLEARFLAG(F_SHIFTED); } if (STATE_G0 != CHARSET_ASCII) { - WRITE3(ESC, '(', 'B') - NEXT_OUT(3) - STATE_SETG0(CHARSET_ASCII) + WRITEBYTE3(ESC, '(', 'B'); + NEXT_OUT(3); + STATE_SETG0(CHARSET_ASCII); } return 0; } ENCODER(iso2022) { - while (inleft > 0) { + while (*inpos < inlen) { const struct iso2022_designation *dsg; DBCHAR encoded; - ucs4_t c = **inbuf; + Py_UCS4 c = INCHAR1; Py_ssize_t insize; if (c < 0x80) { if (STATE_G0 != CHARSET_ASCII) { - WRITE3(ESC, '(', 'B') - STATE_SETG0(CHARSET_ASCII) - NEXT_OUT(3) + WRITEBYTE3(ESC, '(', 'B'); + STATE_SETG0(CHARSET_ASCII); + NEXT_OUT(3); } if (STATE_GETFLAG(F_SHIFTED)) { - WRITE1(SI) - STATE_CLEARFLAG(F_SHIFTED) - NEXT_OUT(1) + WRITEBYTE1(SI); + STATE_CLEARFLAG(F_SHIFTED); + NEXT_OUT(1); } - WRITE1((unsigned char)c) - NEXT(1, 1) + WRITEBYTE1((unsigned char)c); + NEXT(1, 1); continue; } - DECODE_SURROGATE(c) - insize = GET_INSIZE(c); + insize = 1; encoded = MAP_UNMAPPABLE; for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) { @@ -187,24 +186,14 @@ ENCODER(iso2022) if (encoded == MAP_MULTIPLE_AVAIL) { /* this implementation won't work for pair * of non-bmp characters. */ - if (inleft < 2) { + if (inlen - *inpos < 2) { if (!(flags & MBENC_FLUSH)) return MBERR_TOOFEW; length = -1; } else length = 2; -#if Py_UNICODE_SIZE == 2 - if (length == 2) { - ucs4_t u4in[2]; - u4in[0] = (ucs4_t)IN1; - u4in[1] = (ucs4_t)IN2; - encoded = dsg->encoder(u4in, &length); - } else - encoded = dsg->encoder(&c, &length); -#else encoded = dsg->encoder(&c, &length); -#endif if (encoded != MAP_UNMAPPABLE) { insize = length; break; @@ -221,47 +210,46 @@ ENCODER(iso2022) switch (dsg->plane) { case 0: /* G0 */ if (STATE_GETFLAG(F_SHIFTED)) { - WRITE1(SI) - STATE_CLEARFLAG(F_SHIFTED) - NEXT_OUT(1) + WRITEBYTE1(SI); + STATE_CLEARFLAG(F_SHIFTED); + NEXT_OUT(1); } if (STATE_G0 != dsg->mark) { if (dsg->width == 1) { - WRITE3(ESC, '(', ESCMARK(dsg->mark)) - STATE_SETG0(dsg->mark) - NEXT_OUT(3) + WRITEBYTE3(ESC, '(', ESCMARK(dsg->mark)); + STATE_SETG0(dsg->mark); + NEXT_OUT(3); } else if (dsg->mark == CHARSET_JISX0208) { - WRITE3(ESC, '$', ESCMARK(dsg->mark)) - STATE_SETG0(dsg->mark) - NEXT_OUT(3) + WRITEBYTE3(ESC, '$', ESCMARK(dsg->mark)); + STATE_SETG0(dsg->mark); + NEXT_OUT(3); } else { - WRITE4(ESC, '$', '(', - ESCMARK(dsg->mark)) - STATE_SETG0(dsg->mark) - NEXT_OUT(4) + WRITEBYTE4(ESC, '$', '(', + ESCMARK(dsg->mark)); + STATE_SETG0(dsg->mark); + NEXT_OUT(4); } } break; case 1: /* G1 */ if (STATE_G1 != dsg->mark) { if (dsg->width == 1) { - WRITE3(ESC, ')', ESCMARK(dsg->mark)) - STATE_SETG1(dsg->mark) - NEXT_OUT(3) + WRITEBYTE3(ESC, ')', ESCMARK(dsg->mark)); + STATE_SETG1(dsg->mark); + NEXT_OUT(3); } else { - WRITE4(ESC, '$', ')', - ESCMARK(dsg->mark)) - STATE_SETG1(dsg->mark) - NEXT_OUT(4) + WRITEBYTE4(ESC, '$', ')', ESCMARK(dsg->mark)); + STATE_SETG1(dsg->mark); + NEXT_OUT(4); } } if (!STATE_GETFLAG(F_SHIFTED)) { - WRITE1(SO) - STATE_SETFLAG(F_SHIFTED) - NEXT_OUT(1) + WRITEBYTE1(SO); + STATE_SETFLAG(F_SHIFTED); + NEXT_OUT(1); } break; default: /* G2 and G3 is not supported: no encoding in @@ -270,14 +258,14 @@ ENCODER(iso2022) } if (dsg->width == 1) { - WRITE1((unsigned char)encoded) - NEXT_OUT(1) + WRITEBYTE1((unsigned char)encoded); + NEXT_OUT(1); } else { - WRITE2(encoded >> 8, encoded & 0xff) - NEXT_OUT(2) + WRITEBYTE2(encoded >> 8, encoded & 0xff); + NEXT_OUT(2); } - NEXT_IN(insize) + NEXT_INCHAR(insize); } return 0; @@ -285,17 +273,17 @@ ENCODER(iso2022) DECODER_INIT(iso2022) { - STATE_CLEARFLAGS() - STATE_SETG0(CHARSET_ASCII) - STATE_SETG1(CHARSET_ASCII) - STATE_SETG2(CHARSET_ASCII) + STATE_CLEARFLAGS(); + STATE_SETG0(CHARSET_ASCII); + STATE_SETG1(CHARSET_ASCII); + STATE_SETG2(CHARSET_ASCII); return 0; } DECODER_RESET(iso2022) { - STATE_SETG0(CHARSET_ASCII) - STATE_CLEARFLAG(F_SHIFTED) + STATE_SETG0(CHARSET_ASCII); + STATE_CLEARFLAG(F_SHIFTED); return 0; } @@ -314,8 +302,9 @@ iso2022processesc(const void *config, MultibyteCodec_State *state, break; } else if (CONFIG_ISSET(USE_JISX0208_EXT) && i+1 < *inleft && - (*inbuf)[i] == '&' && (*inbuf)[i+1] == '@') + (*inbuf)[i] == '&' && (*inbuf)[i+1] == '@') { i += 2; + } } if (i >= MAX_ESCSEQLEN) @@ -323,27 +312,33 @@ iso2022processesc(const void *config, MultibyteCodec_State *state, switch (esclen) { case 3: - if (IN2 == '$') { - charset = IN3 | CHARSET_DBCS; + if (INBYTE2 == '$') { + charset = INBYTE3 | CHARSET_DBCS; designation = 0; } else { - charset = IN3; - if (IN2 == '(') designation = 0; - else if (IN2 == ')') designation = 1; - else if (CONFIG_ISSET(USE_G2) && IN2 == '.') + charset = INBYTE3; + if (INBYTE2 == '(') + designation = 0; + else if (INBYTE2 == ')') + designation = 1; + else if (CONFIG_ISSET(USE_G2) && INBYTE2 == '.') designation = 2; - else return 3; + else + return 3; } break; case 4: - if (IN2 != '$') + if (INBYTE2 != '$') return 4; - charset = IN4 | CHARSET_DBCS; - if (IN3 == '(') designation = 0; - else if (IN3 == ')') designation = 1; - else return 4; + charset = INBYTE4 | CHARSET_DBCS; + if (INBYTE3 == '(') + designation = 0; + else if (INBYTE3 == ')') + designation = 1; + else + return 4; break; case 6: /* designation with prefix */ if (CONFIG_ISSET(USE_JISX0208_EXT) && @@ -363,58 +358,65 @@ iso2022processesc(const void *config, MultibyteCodec_State *state, if (charset != CHARSET_ASCII) { const struct iso2022_designation *dsg; - for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) + for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) { if (dsg->mark == charset) break; + } if (!dsg->mark) return esclen; } - STATE_SETG(designation, charset) + STATE_SETG(designation, charset); *inleft -= esclen; (*inbuf) += esclen; return 0; } -#define ISO8859_7_DECODE(c, assi) \ - if ((c) < 0xa0) (assi) = (c); \ - else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \ - (assi) = (c); \ - else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \ - (0xbffffd77L & (1L << ((c)-0xb4))))) \ - (assi) = 0x02d0 + (c); \ - else if ((c) == 0xa1) (assi) = 0x2018; \ - else if ((c) == 0xa2) (assi) = 0x2019; \ - else if ((c) == 0xaf) (assi) = 0x2015; +#define ISO8859_7_DECODE(c, writer) \ + if ((c) < 0xa0) { \ + OUTCHAR(c); \ + } else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) { \ + OUTCHAR(c); \ + } else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \ + (0xbffffd77L & (1L << ((c)-0xb4))))) { \ + OUTCHAR(0x02d0 + (c)); \ + } else if ((c) == 0xa1) { \ + OUTCHAR(0x2018); \ + } else if ((c) == 0xa2) { \ + OUTCHAR(0x2019); \ + } else if ((c) == 0xaf) { \ + OUTCHAR(0x2015); \ + } static Py_ssize_t iso2022processg2(const void *config, MultibyteCodec_State *state, const unsigned char **inbuf, Py_ssize_t *inleft, - Py_UNICODE **outbuf, Py_ssize_t *outleft) + _PyUnicodeWriter *writer) { /* not written to use encoder, decoder functions because only few * encodings use G2 designations in CJKCodecs */ if (STATE_G2 == CHARSET_ISO8859_1) { - if (IN3 < 0x80) - OUT1(IN3 + 0x80) + if (INBYTE3 < 0x80) + OUTCHAR(INBYTE3 + 0x80); else return 3; } else if (STATE_G2 == CHARSET_ISO8859_7) { - ISO8859_7_DECODE(IN3 ^ 0x80, **outbuf) - else return 3; + ISO8859_7_DECODE(INBYTE3 ^ 0x80, writer) + else + return 3; } else if (STATE_G2 == CHARSET_ASCII) { - if (IN3 & 0x80) return 3; - else **outbuf = IN3; + if (INBYTE3 & 0x80) + return 3; + else + OUTCHAR(INBYTE3); } else return MBERR_INTERNAL; (*inbuf) += 3; *inleft -= 3; - (*outbuf) += 1; - *outleft -= 1; return 0; } @@ -423,58 +425,58 @@ DECODER(iso2022) const struct iso2022_designation *dsgcache = NULL; while (inleft > 0) { - unsigned char c = IN1; + unsigned char c = INBYTE1; Py_ssize_t err; if (STATE_GETFLAG(F_ESCTHROUGHOUT)) { /* ESC throughout mode: * for non-iso2022 escape sequences */ - WRITE1(c) /* assume as ISO-8859-1 */ - NEXT(1, 1) + OUTCHAR(c); /* assume as ISO-8859-1 */ + NEXT_IN(1); if (IS_ESCEND(c)) { - STATE_CLEARFLAG(F_ESCTHROUGHOUT) + STATE_CLEARFLAG(F_ESCTHROUGHOUT); } continue; } switch (c) { case ESC: - REQUIRE_INBUF(2) - if (IS_ISO2022ESC(IN2)) { + REQUIRE_INBUF(2); + if (IS_ISO2022ESC(INBYTE2)) { err = iso2022processesc(config, state, inbuf, &inleft); if (err != 0) return err; } - else if (CONFIG_ISSET(USE_G2) && IN2 == 'N') {/* SS2 */ - REQUIRE_INBUF(3) + else if (CONFIG_ISSET(USE_G2) && INBYTE2 == 'N') {/* SS2 */ + REQUIRE_INBUF(3); err = iso2022processg2(config, state, - inbuf, &inleft, outbuf, &outleft); + inbuf, &inleft, writer); if (err != 0) return err; } else { - WRITE1(ESC) - STATE_SETFLAG(F_ESCTHROUGHOUT) - NEXT(1, 1) + OUTCHAR(ESC); + STATE_SETFLAG(F_ESCTHROUGHOUT); + NEXT_IN(1); } break; case SI: if (CONFIG_ISSET(NO_SHIFT)) goto bypass; - STATE_CLEARFLAG(F_SHIFTED) - NEXT_IN(1) + STATE_CLEARFLAG(F_SHIFTED); + NEXT_IN(1); break; case SO: if (CONFIG_ISSET(NO_SHIFT)) goto bypass; - STATE_SETFLAG(F_SHIFTED) - NEXT_IN(1) + STATE_SETFLAG(F_SHIFTED); + NEXT_IN(1); break; case LF: - STATE_CLEARFLAG(F_SHIFTED) - WRITE1(LF) - NEXT(1, 1) + STATE_CLEARFLAG(F_SHIFTED); + OUTCHAR(LF); + NEXT_IN(1); break; default: if (c < 0x20) /* C0 */ @@ -484,7 +486,7 @@ DECODER(iso2022) else { const struct iso2022_designation *dsg; unsigned char charset; - ucs4_t decoded; + Py_UCS4 decoded; if (STATE_GETFLAG(F_SHIFTED)) charset = STATE_G1; @@ -492,43 +494,44 @@ DECODER(iso2022) charset = STATE_G0; if (charset == CHARSET_ASCII) { -bypass: WRITE1(c) - NEXT(1, 1) - break; - } - - if (dsgcache != NULL && - dsgcache->mark == charset) - dsg = dsgcache; - else { - for (dsg = CONFIG_DESIGNATIONS; - dsg->mark != charset +bypass: + OUTCHAR(c); + NEXT_IN(1); + break; + } + + if (dsgcache != NULL && + dsgcache->mark == charset) + dsg = dsgcache; + else { + for (dsg = CONFIG_DESIGNATIONS; + dsg->mark != charset #ifdef Py_DEBUG - && dsg->mark != '\0' + && dsg->mark != '\0' #endif - ;dsg++) - /* noop */; - assert(dsg->mark != '\0'); - dsgcache = dsg; - } - - REQUIRE_INBUF(dsg->width) - decoded = dsg->decoder(*inbuf); - if (decoded == MAP_UNMAPPABLE) - return dsg->width; - - if (decoded < 0x10000) { - WRITE1(decoded) - NEXT_OUT(1) - } - else if (decoded < 0x30000) { - WRITEUCS4(decoded) - } - else { /* JIS X 0213 pairs */ - WRITE2(decoded >> 16, decoded & 0xffff) - NEXT_OUT(2) + ; dsg++) + { + /* noop */ + } + assert(dsg->mark != '\0'); + dsgcache = dsg; } - NEXT_IN(dsg->width) + + REQUIRE_INBUF(dsg->width); + decoded = dsg->decoder(*inbuf); + if (decoded == MAP_UNMAPPABLE) + return dsg->width; + + if (decoded < 0x10000) { + OUTCHAR(decoded); + } + else if (decoded < 0x30000) { + OUTCHAR(decoded); + } + else { /* JIS X 0213 pairs */ + OUTCHAR2(decoded >> 16, decoded & 0xffff); + } + NEXT_IN(dsg->width); } break; } @@ -577,25 +580,26 @@ ksx1001_init(void) return 0; } -static ucs4_t +static Py_UCS4 ksx1001_decoder(const unsigned char *data) { - ucs4_t u; - TRYMAP_DEC(ksx1001, u, data[0], data[1]) + Py_UCS4 u; + if (TRYMAP_DEC(ksx1001, u, data[0], data[1])) return u; else return MAP_UNMAPPABLE; } static DBCHAR -ksx1001_encoder(const ucs4_t *data, Py_ssize_t *length) +ksx1001_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; assert(*length == 1); if (*data < 0x10000) { - TRYMAP_ENC(cp949, coded, *data) + if (TRYMAP_ENC(cp949, coded, *data)) { if (!(coded & 0x8000)) return coded; + } } return MAP_UNMAPPABLE; } @@ -613,27 +617,27 @@ jisx0208_init(void) return 0; } -static ucs4_t +static Py_UCS4 jisx0208_decoder(const unsigned char *data) { - ucs4_t u; + Py_UCS4 u; if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */ return 0xff3c; - else TRYMAP_DEC(jisx0208, u, data[0], data[1]) + else if (TRYMAP_DEC(jisx0208, u, data[0], data[1])) return u; else return MAP_UNMAPPABLE; } static DBCHAR -jisx0208_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0208_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; assert(*length == 1); if (*data < 0x10000) { if (*data == 0xff3c) /* F/W REVERSE SOLIDUS */ return 0x2140; - else TRYMAP_ENC(jisxcommon, coded, *data) { + else if (TRYMAP_ENC(jisxcommon, coded, *data)) { if (!(coded & 0x8000)) return coded; } @@ -654,23 +658,23 @@ jisx0212_init(void) return 0; } -static ucs4_t +static Py_UCS4 jisx0212_decoder(const unsigned char *data) { - ucs4_t u; - TRYMAP_DEC(jisx0212, u, data[0], data[1]) + Py_UCS4 u; + if (TRYMAP_DEC(jisx0212, u, data[0], data[1])) return u; else return MAP_UNMAPPABLE; } static DBCHAR -jisx0212_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0212_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; assert(*length == 1); if (*data < 0x10000) { - TRYMAP_ENC(jisxcommon, coded, *data) { + if (TRYMAP_ENC(jisxcommon, coded, *data)) { if (coded & 0x8000) return coded & 0x7fff; } @@ -705,30 +709,34 @@ jisx0213_init(void) } #define config ((void *)2000) -static ucs4_t +static Py_UCS4 jisx0213_2000_1_decoder(const unsigned char *data) { - ucs4_t u; + Py_UCS4 u; EMULATE_JISX0213_2000_DECODE_PLANE1(u, data[0], data[1]) else if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */ return 0xff3c; - else TRYMAP_DEC(jisx0208, u, data[0], data[1]); - else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]); - else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1]) + else if (TRYMAP_DEC(jisx0208, u, data[0], data[1])) + ; + else if (TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1])) + ; + else if (TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])) u |= 0x20000; - else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]); + else if (TRYMAP_DEC(jisx0213_pair, u, data[0], data[1])) + ; else return MAP_UNMAPPABLE; return u; } -static ucs4_t +static Py_UCS4 jisx0213_2000_2_decoder(const unsigned char *data) { - ucs4_t u; - EMULATE_JISX0213_2000_DECODE_PLANE2(u, data[0], data[1]) - TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]); - else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1]) + Py_UCS4 u; + EMULATE_JISX0213_2000_DECODE_PLANE2_CHAR(u, data[0], data[1]) + if (TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1])) + ; + else if (TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])) u |= 0x20000; else return MAP_UNMAPPABLE; @@ -736,28 +744,32 @@ jisx0213_2000_2_decoder(const unsigned char *data) } #undef config -static ucs4_t +static Py_UCS4 jisx0213_2004_1_decoder(const unsigned char *data) { - ucs4_t u; + Py_UCS4 u; if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */ return 0xff3c; - else TRYMAP_DEC(jisx0208, u, data[0], data[1]); - else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]); - else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1]) + else if (TRYMAP_DEC(jisx0208, u, data[0], data[1])) + ; + else if (TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1])) + ; + else if (TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])) u |= 0x20000; - else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]); + else if (TRYMAP_DEC(jisx0213_pair, u, data[0], data[1])) + ; else return MAP_UNMAPPABLE; return u; } -static ucs4_t +static Py_UCS4 jisx0213_2004_2_decoder(const unsigned char *data) { - ucs4_t u; - TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]); - else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1]) + Py_UCS4 u; + if (TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1])) + ; + else if (TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])) u |= 0x20000; else return MAP_UNMAPPABLE; @@ -765,7 +777,7 @@ jisx0213_2004_2_decoder(const unsigned char *data) } static DBCHAR -jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config) +jisx0213_encoder(const Py_UCS4 *data, Py_ssize_t *length, void *config) { DBCHAR coded; @@ -774,28 +786,28 @@ jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config) if (*data >= 0x10000) { if ((*data) >> 16 == 0x20000 >> 16) { EMULATE_JISX0213_2000_ENCODE_EMP(coded, *data) - else TRYMAP_ENC(jisx0213_emp, coded, - (*data) & 0xffff) + else if (TRYMAP_ENC(jisx0213_emp, coded, (*data) & 0xffff)) return coded; } return MAP_UNMAPPABLE; } EMULATE_JISX0213_2000_ENCODE_BMP(coded, *data) - else TRYMAP_ENC(jisx0213_bmp, coded, *data) { + else if (TRYMAP_ENC(jisx0213_bmp, coded, *data)) { if (coded == MULTIC) return MAP_MULTIPLE_AVAIL; } - else TRYMAP_ENC(jisxcommon, coded, *data) { + else if (TRYMAP_ENC(jisxcommon, coded, *data)) { if (coded & 0x8000) return MAP_UNMAPPABLE; } else return MAP_UNMAPPABLE; return coded; + case 2: /* second character of unicode pair */ coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1], - jisx0213_pair_encmap, JISX0213_ENCPAIRS); + jisx0213_pair_encmap, JISX0213_ENCPAIRS); if (coded == DBCINV) { *length = 1; coded = find_pairencmap((ucs2_t)data[0], 0, @@ -805,21 +817,24 @@ jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config) } else return coded; + case -1: /* flush unterminated */ *length = 1; coded = find_pairencmap((ucs2_t)data[0], 0, - jisx0213_pair_encmap, JISX0213_ENCPAIRS); + jisx0213_pair_encmap, JISX0213_ENCPAIRS); if (coded == DBCINV) return MAP_UNMAPPABLE; else return coded; + break; + default: return MAP_UNMAPPABLE; } } static DBCHAR -jisx0213_2000_1_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0213_2000_1_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded = jisx0213_encoder(data, length, (void *)2000); if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) @@ -831,7 +846,7 @@ jisx0213_2000_1_encoder(const ucs4_t *data, Py_ssize_t *length) } static DBCHAR -jisx0213_2000_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length) +jisx0213_2000_1_encoder_paironly(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; Py_ssize_t ilength = *length; @@ -854,7 +869,7 @@ jisx0213_2000_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length) } static DBCHAR -jisx0213_2000_2_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0213_2000_2_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded = jisx0213_encoder(data, length, (void *)2000); if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) @@ -866,7 +881,7 @@ jisx0213_2000_2_encoder(const ucs4_t *data, Py_ssize_t *length) } static DBCHAR -jisx0213_2004_1_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0213_2004_1_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded = jisx0213_encoder(data, length, NULL); if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) @@ -878,7 +893,7 @@ jisx0213_2004_1_encoder(const ucs4_t *data, Py_ssize_t *length) } static DBCHAR -jisx0213_2004_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length) +jisx0213_2004_1_encoder_paironly(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; Py_ssize_t ilength = *length; @@ -901,7 +916,7 @@ jisx0213_2004_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length) } static DBCHAR -jisx0213_2004_2_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0213_2004_2_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded = jisx0213_encoder(data, length, NULL); if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) @@ -912,39 +927,43 @@ jisx0213_2004_2_encoder(const ucs4_t *data, Py_ssize_t *length) return MAP_UNMAPPABLE; } -static ucs4_t +static Py_UCS4 jisx0201_r_decoder(const unsigned char *data) { - ucs4_t u; - JISX0201_R_DECODE(*data, u) - else return MAP_UNMAPPABLE; + Py_UCS4 u; + JISX0201_R_DECODE_CHAR(*data, u) + else + return MAP_UNMAPPABLE; return u; } static DBCHAR -jisx0201_r_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0201_r_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; JISX0201_R_ENCODE(*data, coded) - else return MAP_UNMAPPABLE; + else + return MAP_UNMAPPABLE; return coded; } -static ucs4_t +static Py_UCS4 jisx0201_k_decoder(const unsigned char *data) { - ucs4_t u; - JISX0201_K_DECODE(*data ^ 0x80, u) - else return MAP_UNMAPPABLE; + Py_UCS4 u; + JISX0201_K_DECODE_CHAR(*data ^ 0x80, u) + else + return MAP_UNMAPPABLE; return u; } static DBCHAR -jisx0201_k_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0201_k_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; JISX0201_K_ENCODE(*data, coded) - else return MAP_UNMAPPABLE; + else + return MAP_UNMAPPABLE; return coded - 0x80; } @@ -961,23 +980,23 @@ gb2312_init(void) return 0; } -static ucs4_t +static Py_UCS4 gb2312_decoder(const unsigned char *data) { - ucs4_t u; - TRYMAP_DEC(gb2312, u, data[0], data[1]) + Py_UCS4 u; + if (TRYMAP_DEC(gb2312, u, data[0], data[1])) return u; else return MAP_UNMAPPABLE; } static DBCHAR -gb2312_encoder(const ucs4_t *data, Py_ssize_t *length) +gb2312_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; assert(*length == 1); if (*data < 0x10000) { - TRYMAP_ENC(gbcommon, coded, *data) { + if (TRYMAP_ENC(gbcommon, coded, *data)) { if (!(coded & 0x8000)) return coded; } @@ -986,14 +1005,14 @@ gb2312_encoder(const ucs4_t *data, Py_ssize_t *length) } -static ucs4_t +static Py_UCS4 dummy_decoder(const unsigned char *data) { return MAP_UNMAPPABLE; } static DBCHAR -dummy_encoder(const ucs4_t *data, Py_ssize_t *length) +dummy_encoder(const Py_UCS4 *data, Py_ssize_t *length) { return MAP_UNMAPPABLE; } |