From 28c63f7ffb9f9cb59c524dc14ce66d34c0e83af6 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 29 Oct 2013 00:59:44 +0100 Subject: CJK codecs: less magical macros, semicolon is now explicit --- Modules/cjkcodecs/_codecs_cn.c | 27 ++++--- Modules/cjkcodecs/_codecs_hk.c | 2 +- Modules/cjkcodecs/_codecs_iso2022.c | 144 ++++++++++++++++++---------------- Modules/cjkcodecs/_codecs_jp.c | 21 ++--- Modules/cjkcodecs/_codecs_kr.c | 15 ++-- Modules/cjkcodecs/_codecs_tw.c | 4 +- Modules/cjkcodecs/cjkcodecs.h | 7 +- Modules/cjkcodecs/emu_jisx0213_2000.h | 2 +- 8 files changed, 118 insertions(+), 104 deletions(-) diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c index 5fbcfec..013c3fb 100644 --- a/Modules/cjkcodecs/_codecs_cn.c +++ b/Modules/cjkcodecs/_codecs_cn.c @@ -40,14 +40,15 @@ OUTCHAR(decoded); \ } -#define GBK_ENCODE(code, assi) \ - if ((code) == 0x2014) { \ - (assi) = 0xa1aa; \ - } else if ((code) == 0x2015) { \ - (assi) = 0xa844; \ - } else if ((code) == 0x00b7) { \ - (assi) = 0xa1a4; \ +#define GBK_ENCODE(code, assi) \ + if ((code) == 0x2014) { \ + (assi) = 0xa1aa; \ + } else if ((code) == 0x2015) { \ + (assi) = 0xa844; \ + } else if ((code) == 0x00b7) { \ + (assi) = 0xa1a4; \ } else if ((code) != 0x30fb && TRYMAP_ENC(gbcommon, assi, code)) { \ + ; \ } /* @@ -98,7 +99,7 @@ DECODER(gb2312) continue; } - REQUIRE_INBUF(2) + REQUIRE_INBUF(2); if (TRYMAP_DEC(gb2312, decoded, c ^ 0x80, INBYTE2 ^ 0x80)) { OUTCHAR(decoded); NEXT_IN(2); @@ -159,7 +160,7 @@ DECODER(gbk) continue; } - REQUIRE_INBUF(2) + REQUIRE_INBUF(2); GBK_DECODE(c, INBYTE2, writer) else @@ -267,7 +268,7 @@ DECODER(gb18030) continue; } - REQUIRE_INBUF(2) + REQUIRE_INBUF(2); c2 = INBYTE2; if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */ @@ -275,7 +276,7 @@ DECODER(gb18030) unsigned char c3, c4; Py_UCS4 lseq; - REQUIRE_INBUF(4) + REQUIRE_INBUF(4); c3 = INBYTE3; c4 = INBYTE4; if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39) @@ -405,7 +406,7 @@ DECODER(hz) if (c == '~') { unsigned char c2 = INBYTE2; - REQUIRE_INBUF(2) + REQUIRE_INBUF(2); if (c2 == '~') { OUTCHAR('~'); NEXT_IN(2); @@ -431,7 +432,7 @@ DECODER(hz) NEXT_IN(1); } else { /* GB mode */ - REQUIRE_INBUF(2) + REQUIRE_INBUF(2); if (TRYMAP_DEC(gb2312, decoded, c, INBYTE2)) { OUTCHAR(decoded); NEXT_IN(2); diff --git a/Modules/cjkcodecs/_codecs_hk.c b/Modules/cjkcodecs/_codecs_hk.c index 92c468a..b7a7ebd 100644 --- a/Modules/cjkcodecs/_codecs_hk.c +++ b/Modules/cjkcodecs/_codecs_hk.c @@ -119,7 +119,7 @@ DECODER(big5hkscs) continue; } - REQUIRE_INBUF(2) + REQUIRE_INBUF(2); if (0xc6 > c || c > 0xc8 || (c < 0xc7 && INBYTE2 < 0xa1)) { if (TRYMAP_DEC(big5, decoded, c, INBYTE2)) { diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c index 3e5fb43..5c401aa 100644 --- a/Modules/cjkcodecs/_codecs_iso2022.c +++ b/Modules/cjkcodecs/_codecs_iso2022.c @@ -73,7 +73,7 @@ #define F_SHIFTED 0x01 #define F_ESCTHROUGHOUT 0x02 -#define STATE_SETG(dn, v) ((state)->c[dn]) = (v); +#define STATE_SETG(dn, v) do { ((state)->c[dn]) = (v); } while (0) #define STATE_GETG(dn) ((state)->c[dn]) #define STATE_G0 STATE_GETG(0) @@ -85,10 +85,10 @@ #define STATE_SETG2(v) STATE_SETG(2, v) #define STATE_SETG3(v) STATE_SETG(3, v) -#define STATE_SETFLAG(f) ((state)->c[4]) |= (f); +#define STATE_SETFLAG(f) do { ((state)->c[4]) |= (f); } while (0) #define STATE_GETFLAG(f) ((state)->c[4] & (f)) -#define STATE_CLEARFLAG(f) ((state)->c[4]) &= ~(f); -#define STATE_CLEARFLAGS() ((state)->c[4]) = 0; +#define STATE_CLEARFLAG(f) do { ((state)->c[4]) &= ~(f); } while (0) +#define STATE_CLEARFLAGS() do { ((state)->c[4]) = 0; } while (0) #define ISO2022_CONFIG ((const struct iso2022_config *)config) #define CONFIG_ISSET(flag) (ISO2022_CONFIG->flags & (flag)) @@ -132,9 +132,9 @@ CODEC_INIT(iso2022) ENCODER_INIT(iso2022) { - STATE_CLEARFLAGS() - STATE_SETG0(CHARSET_ASCII) - STATE_SETG1(CHARSET_ASCII) + STATE_CLEARFLAGS(); + STATE_SETG0(CHARSET_ASCII); + STATE_SETG1(CHARSET_ASCII); return 0; } @@ -143,12 +143,12 @@ ENCODER_RESET(iso2022) if (STATE_GETFLAG(F_SHIFTED)) { WRITEBYTE1(SI); NEXT_OUT(1); - STATE_CLEARFLAG(F_SHIFTED) + STATE_CLEARFLAG(F_SHIFTED); } if (STATE_G0 != CHARSET_ASCII) { WRITEBYTE3(ESC, '(', 'B'); NEXT_OUT(3); - STATE_SETG0(CHARSET_ASCII) + STATE_SETG0(CHARSET_ASCII); } return 0; } @@ -164,12 +164,12 @@ ENCODER(iso2022) if (c < 0x80) { if (STATE_G0 != CHARSET_ASCII) { WRITEBYTE3(ESC, '(', 'B'); - STATE_SETG0(CHARSET_ASCII) + STATE_SETG0(CHARSET_ASCII); NEXT_OUT(3); } if (STATE_GETFLAG(F_SHIFTED)) { WRITEBYTE1(SI); - STATE_CLEARFLAG(F_SHIFTED) + STATE_CLEARFLAG(F_SHIFTED); NEXT_OUT(1); } WRITEBYTE1((unsigned char)c); @@ -211,24 +211,24 @@ ENCODER(iso2022) case 0: /* G0 */ if (STATE_GETFLAG(F_SHIFTED)) { WRITEBYTE1(SI); - STATE_CLEARFLAG(F_SHIFTED) + STATE_CLEARFLAG(F_SHIFTED); NEXT_OUT(1); } if (STATE_G0 != dsg->mark) { if (dsg->width == 1) { WRITEBYTE3(ESC, '(', ESCMARK(dsg->mark)); - STATE_SETG0(dsg->mark) + STATE_SETG0(dsg->mark); NEXT_OUT(3); } else if (dsg->mark == CHARSET_JISX0208) { WRITEBYTE3(ESC, '$', ESCMARK(dsg->mark)); - STATE_SETG0(dsg->mark) + STATE_SETG0(dsg->mark); NEXT_OUT(3); } else { WRITEBYTE4(ESC, '$', '(', ESCMARK(dsg->mark)); - STATE_SETG0(dsg->mark) + STATE_SETG0(dsg->mark); NEXT_OUT(4); } } @@ -237,19 +237,18 @@ ENCODER(iso2022) if (STATE_G1 != dsg->mark) { if (dsg->width == 1) { WRITEBYTE3(ESC, ')', ESCMARK(dsg->mark)); - STATE_SETG1(dsg->mark) + STATE_SETG1(dsg->mark); NEXT_OUT(3); } else { - WRITEBYTE4(ESC, '$', ')', - ESCMARK(dsg->mark)); - STATE_SETG1(dsg->mark) + WRITEBYTE4(ESC, '$', ')', ESCMARK(dsg->mark)); + STATE_SETG1(dsg->mark); NEXT_OUT(4); } } if (!STATE_GETFLAG(F_SHIFTED)) { WRITEBYTE1(SO); - STATE_SETFLAG(F_SHIFTED) + STATE_SETFLAG(F_SHIFTED); NEXT_OUT(1); } break; @@ -274,17 +273,17 @@ ENCODER(iso2022) DECODER_INIT(iso2022) { - STATE_CLEARFLAGS() - STATE_SETG0(CHARSET_ASCII) - STATE_SETG1(CHARSET_ASCII) - STATE_SETG2(CHARSET_ASCII) + STATE_CLEARFLAGS(); + STATE_SETG0(CHARSET_ASCII); + STATE_SETG1(CHARSET_ASCII); + STATE_SETG2(CHARSET_ASCII); return 0; } DECODER_RESET(iso2022) { - STATE_SETG0(CHARSET_ASCII) - STATE_CLEARFLAG(F_SHIFTED) + STATE_SETG0(CHARSET_ASCII); + STATE_CLEARFLAG(F_SHIFTED); return 0; } @@ -303,8 +302,9 @@ iso2022processesc(const void *config, MultibyteCodec_State *state, break; } else if (CONFIG_ISSET(USE_JISX0208_EXT) && i+1 < *inleft && - (*inbuf)[i] == '&' && (*inbuf)[i+1] == '@') + (*inbuf)[i] == '&' && (*inbuf)[i+1] == '@') { i += 2; + } } if (i >= MAX_ESCSEQLEN) @@ -358,14 +358,15 @@ iso2022processesc(const void *config, MultibyteCodec_State *state, if (charset != CHARSET_ASCII) { const struct iso2022_designation *dsg; - for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) + for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) { if (dsg->mark == charset) break; + } if (!dsg->mark) return esclen; } - STATE_SETG(designation, charset) + STATE_SETG(designation, charset); *inleft -= esclen; (*inbuf) += esclen; return 0; @@ -433,14 +434,14 @@ DECODER(iso2022) OUTCHAR(c); /* assume as ISO-8859-1 */ NEXT_IN(1); if (IS_ESCEND(c)) { - STATE_CLEARFLAG(F_ESCTHROUGHOUT) + STATE_CLEARFLAG(F_ESCTHROUGHOUT); } continue; } switch (c) { case ESC: - REQUIRE_INBUF(2) + REQUIRE_INBUF(2); if (IS_ISO2022ESC(INBYTE2)) { err = iso2022processesc(config, state, inbuf, &inleft); @@ -448,7 +449,7 @@ DECODER(iso2022) return err; } else if (CONFIG_ISSET(USE_G2) && INBYTE2 == 'N') {/* SS2 */ - REQUIRE_INBUF(3) + REQUIRE_INBUF(3); err = iso2022processg2(config, state, inbuf, &inleft, writer); if (err != 0) @@ -456,24 +457,24 @@ DECODER(iso2022) } else { OUTCHAR(ESC); - STATE_SETFLAG(F_ESCTHROUGHOUT) + STATE_SETFLAG(F_ESCTHROUGHOUT); NEXT_IN(1); } break; case SI: if (CONFIG_ISSET(NO_SHIFT)) goto bypass; - STATE_CLEARFLAG(F_SHIFTED) + STATE_CLEARFLAG(F_SHIFTED); NEXT_IN(1); break; case SO: if (CONFIG_ISSET(NO_SHIFT)) goto bypass; - STATE_SETFLAG(F_SHIFTED) + STATE_SETFLAG(F_SHIFTED); NEXT_IN(1); break; case LF: - STATE_CLEARFLAG(F_SHIFTED) + STATE_CLEARFLAG(F_SHIFTED); OUTCHAR(LF); NEXT_IN(1); break; @@ -493,38 +494,41 @@ DECODER(iso2022) charset = STATE_G0; if (charset == CHARSET_ASCII) { -bypass: OUTCHAR(c); - NEXT_IN(1); - break; - } - - if (dsgcache != NULL && - dsgcache->mark == charset) - dsg = dsgcache; - else { - for (dsg = CONFIG_DESIGNATIONS; - dsg->mark != charset +bypass: + OUTCHAR(c); + NEXT_IN(1); + break; + } + + if (dsgcache != NULL && + dsgcache->mark == charset) + dsg = dsgcache; + else { + for (dsg = CONFIG_DESIGNATIONS; + dsg->mark != charset #ifdef Py_DEBUG - && dsg->mark != '\0' + && dsg->mark != '\0' #endif - ;dsg++) - /* noop */; - assert(dsg->mark != '\0'); - dsgcache = dsg; - } - - REQUIRE_INBUF(dsg->width) - decoded = dsg->decoder(*inbuf); - if (decoded == MAP_UNMAPPABLE) - return dsg->width; - - if (decoded < 0x10000) { - OUTCHAR(decoded); - } - else if (decoded < 0x30000) { - OUTCHAR(decoded); - } - else { /* JIS X 0213 pairs */ + ; dsg++) + { + /* noop */ + } + assert(dsg->mark != '\0'); + dsgcache = dsg; + } + + REQUIRE_INBUF(dsg->width); + decoded = dsg->decoder(*inbuf); + if (decoded == MAP_UNMAPPABLE) + return dsg->width; + + if (decoded < 0x10000) { + OUTCHAR(decoded); + } + else if (decoded < 0x30000) { + OUTCHAR(decoded); + } + else { /* JIS X 0213 pairs */ OUTCHAR2(decoded >> 16, decoded & 0xffff); } NEXT_IN(dsg->width); @@ -800,9 +804,10 @@ jisx0213_encoder(const Py_UCS4 *data, Py_ssize_t *length, void *config) else return MAP_UNMAPPABLE; return coded; + case 2: /* second character of unicode pair */ coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1], - jisx0213_pair_encmap, JISX0213_ENCPAIRS); + jisx0213_pair_encmap, JISX0213_ENCPAIRS); if (coded == DBCINV) { *length = 1; coded = find_pairencmap((ucs2_t)data[0], 0, @@ -812,14 +817,17 @@ jisx0213_encoder(const Py_UCS4 *data, Py_ssize_t *length, void *config) } else return coded; + case -1: /* flush unterminated */ *length = 1; coded = find_pairencmap((ucs2_t)data[0], 0, - jisx0213_pair_encmap, JISX0213_ENCPAIRS); + jisx0213_pair_encmap, JISX0213_ENCPAIRS); if (coded == DBCINV) return MAP_UNMAPPABLE; else return coded; + break; + default: return MAP_UNMAPPABLE; } diff --git a/Modules/cjkcodecs/_codecs_jp.c b/Modules/cjkcodecs/_codecs_jp.c index 447bf77..2c7788a 100644 --- a/Modules/cjkcodecs/_codecs_jp.c +++ b/Modules/cjkcodecs/_codecs_jp.c @@ -107,7 +107,7 @@ DECODER(cp932) continue; } - REQUIRE_INBUF(2) + REQUIRE_INBUF(2); c2 = INBYTE2; if (TRYMAP_DEC(cp932ext, decoded, c, c2)) @@ -254,7 +254,7 @@ DECODER(euc_jis_2004) /* JIS X 0201 half-width katakana */ unsigned char c2; - REQUIRE_INBUF(2) + REQUIRE_INBUF(2); c2 = INBYTE2; if (c2 >= 0xa1 && c2 <= 0xdf) { OUTCHAR(0xfec0 + c2); @@ -266,7 +266,7 @@ DECODER(euc_jis_2004) else if (c == 0x8f) { unsigned char c2, c3; - REQUIRE_INBUF(3) + REQUIRE_INBUF(3); c2 = INBYTE2 ^ 0x80; c3 = INBYTE3 ^ 0x80; @@ -288,7 +288,7 @@ DECODER(euc_jis_2004) else { unsigned char c2; - REQUIRE_INBUF(2) + REQUIRE_INBUF(2); c ^= 0x80; c2 = INBYTE2 ^ 0x80; @@ -395,7 +395,7 @@ DECODER(euc_jp) /* JIS X 0201 half-width katakana */ unsigned char c2; - REQUIRE_INBUF(2) + REQUIRE_INBUF(2); c2 = INBYTE2; if (c2 >= 0xa1 && c2 <= 0xdf) { OUTCHAR(0xfec0 + c2); @@ -407,7 +407,7 @@ DECODER(euc_jp) else if (c == 0x8f) { unsigned char c2, c3; - REQUIRE_INBUF(3) + REQUIRE_INBUF(3); c2 = INBYTE2; c3 = INBYTE3; /* JIS X 0212 */ @@ -421,7 +421,7 @@ DECODER(euc_jp) else { unsigned char c2; - REQUIRE_INBUF(2) + REQUIRE_INBUF(2); c2 = INBYTE2; /* JIS X 0208 */ #ifndef STRICT_BUILD @@ -521,7 +521,7 @@ DECODER(shift_jis) else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){ unsigned char c1, c2; - REQUIRE_INBUF(2) + REQUIRE_INBUF(2); c2 = INBYTE2; if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) return 1; @@ -638,7 +638,8 @@ ENCODER(shift_jis_2004) c1 = code >> 8; c2 = (code & 0xff) - 0x21; - if (c1 & 0x80) { /* Plane 2 */ + if (c1 & 0x80) { + /* Plane 2 */ if (c1 >= 0xee) c1 -= 0x87; else if (c1 >= 0xac || c1 == 0xa8) @@ -673,7 +674,7 @@ DECODER(shift_jis_2004) unsigned char c1, c2; Py_UCS4 code, decoded; - REQUIRE_INBUF(2) + REQUIRE_INBUF(2); c2 = INBYTE2; if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) return 1; diff --git a/Modules/cjkcodecs/_codecs_kr.c b/Modules/cjkcodecs/_codecs_kr.c index 6a3a1f7..1ad41a7 100644 --- a/Modules/cjkcodecs/_codecs_kr.c +++ b/Modules/cjkcodecs/_codecs_kr.c @@ -58,9 +58,10 @@ ENCODER(euc_kr) OUTBYTE2((code & 0xFF) | 0x80); NEXT(1, 2); } - else { /* Mapping is found in CP949 extension, - * but we encode it in KS X 1001:1998 Annex 3, - * make-up sequence for EUC-KR. */ + else { + /* Mapping is found in CP949 extension, + but we encode it in KS X 1001:1998 Annex 3, + make-up sequence for EUC-KR. */ REQUIRE_OUTBUF(8); @@ -115,14 +116,14 @@ DECODER(euc_kr) continue; } - REQUIRE_INBUF(2) + REQUIRE_INBUF(2); if (c == EUCKR_JAMO_FIRSTBYTE && INBYTE2 == EUCKR_JAMO_FILLER) { /* KS X 1001:1998 Annex 3 make-up sequence */ DBCHAR cho, jung, jong; - REQUIRE_INBUF(8) + REQUIRE_INBUF(8); if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE || (*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE || (*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE) @@ -212,7 +213,7 @@ DECODER(cp949) continue; } - REQUIRE_INBUF(2) + REQUIRE_INBUF(2); if (TRYMAP_DEC(ksx1001, decoded, c ^ 0x80, INBYTE2 ^ 0x80)) OUTCHAR(decoded); else if (TRYMAP_DEC(cp949ext, decoded, c, INBYTE2)) @@ -369,7 +370,7 @@ DECODER(johab) continue; } - REQUIRE_INBUF(2) + REQUIRE_INBUF(2); c2 = INBYTE2; if (c < 0xd8) { diff --git a/Modules/cjkcodecs/_codecs_tw.c b/Modules/cjkcodecs/_codecs_tw.c index 016770c..722b26b 100644 --- a/Modules/cjkcodecs/_codecs_tw.c +++ b/Modules/cjkcodecs/_codecs_tw.c @@ -54,7 +54,7 @@ DECODER(big5) continue; } - REQUIRE_INBUF(2) + REQUIRE_INBUF(2); if (TRYMAP_DEC(big5, decoded, c, INBYTE2)) { OUTCHAR(decoded); NEXT_IN(2); @@ -113,7 +113,7 @@ DECODER(cp950) continue; } - REQUIRE_INBUF(2) + REQUIRE_INBUF(2); if (TRYMAP_DEC(cp950ext, decoded, c, INBYTE2)) OUTCHAR(decoded); diff --git a/Modules/cjkcodecs/cjkcodecs.h b/Modules/cjkcodecs/cjkcodecs.h index 2aedf25..25bab41 100644 --- a/Modules/cjkcodecs/cjkcodecs.h +++ b/Modules/cjkcodecs/cjkcodecs.h @@ -113,8 +113,11 @@ static const struct dbcs_map *mapping_list; } while (0) #define REQUIRE_INBUF(n) \ - if (inleft < (n)) \ - return MBERR_TOOFEW; + do { \ + if (inleft < (n)) \ + return MBERR_TOOFEW; \ + } while (0) + #define REQUIRE_OUTBUF(n) \ do { \ if (outleft < (n)) \ diff --git a/Modules/cjkcodecs/emu_jisx0213_2000.h b/Modules/cjkcodecs/emu_jisx0213_2000.h index 672eb06..a5d5a70 100644 --- a/Modules/cjkcodecs/emu_jisx0213_2000.h +++ b/Modules/cjkcodecs/emu_jisx0213_2000.h @@ -2,7 +2,7 @@ * standards. */ #ifndef EMULATE_JISX0213_2000_ENCODE_INVALID -#define EMULATE_JISX0213_2000_ENCODE_INVALID 1 +# define EMULATE_JISX0213_2000_ENCODE_INVALID 1 #endif #define EMULATE_JISX0213_2000_ENCODE_BMP(assi, c) \ -- cgit v0.12