diff options
Diffstat (limited to 'Modules/cjkcodecs')
-rw-r--r-- | Modules/cjkcodecs/_codecs_cn.c | 634 | ||||
-rw-r--r-- | Modules/cjkcodecs/_codecs_hk.c | 256 | ||||
-rw-r--r-- | Modules/cjkcodecs/_codecs_iso2022.c | 1622 | ||||
-rw-r--r-- | Modules/cjkcodecs/_codecs_jp.c | 1204 | ||||
-rw-r--r-- | Modules/cjkcodecs/_codecs_kr.c | 602 | ||||
-rw-r--r-- | Modules/cjkcodecs/_codecs_tw.c | 140 | ||||
-rw-r--r-- | Modules/cjkcodecs/alg_jisx0201.h | 46 | ||||
-rw-r--r-- | Modules/cjkcodecs/cjkcodecs.h | 578 | ||||
-rw-r--r-- | Modules/cjkcodecs/emu_jisx0213_2000.h | 58 | ||||
-rw-r--r-- | Modules/cjkcodecs/multibytecodec.c | 2870 | ||||
-rw-r--r-- | Modules/cjkcodecs/multibytecodec.h | 132 |
11 files changed, 4071 insertions, 4071 deletions
diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c index 4542ce6..ab4e659 100644 --- a/Modules/cjkcodecs/_codecs_cn.c +++ b/Modules/cjkcodecs/_codecs_cn.c @@ -17,24 +17,24 @@ /* GBK and GB2312 map differently in few codepoints that are listed below: * - * gb2312 gbk - * A1A4 U+30FB KATAKANA MIDDLE DOT U+00B7 MIDDLE DOT - * A1AA U+2015 HORIZONTAL BAR U+2014 EM DASH - * A844 undefined U+2015 HORIZONTAL BAR + * gb2312 gbk + * A1A4 U+30FB KATAKANA MIDDLE DOT U+00B7 MIDDLE DOT + * A1AA U+2015 HORIZONTAL BAR U+2014 EM DASH + * A844 undefined U+2015 HORIZONTAL BAR */ #define GBK_DECODE(dc1, dc2, assi) \ - if ((dc1) == 0xa1 && (dc2) == 0xaa) (assi) = 0x2014; \ - else if ((dc1) == 0xa8 && (dc2) == 0x44) (assi) = 0x2015; \ - else if ((dc1) == 0xa1 && (dc2) == 0xa4) (assi) = 0x00b7; \ - else TRYMAP_DEC(gb2312, assi, dc1 ^ 0x80, dc2 ^ 0x80); \ - else TRYMAP_DEC(gbkext, assi, dc1, dc2); + if ((dc1) == 0xa1 && (dc2) == 0xaa) (assi) = 0x2014; \ + else if ((dc1) == 0xa8 && (dc2) == 0x44) (assi) = 0x2015; \ + else if ((dc1) == 0xa1 && (dc2) == 0xa4) (assi) = 0x00b7; \ + else TRYMAP_DEC(gb2312, assi, dc1 ^ 0x80, dc2 ^ 0x80); \ + else TRYMAP_DEC(gbkext, assi, dc1, dc2); #define GBK_ENCODE(code, assi) \ - if ((code) == 0x2014) (assi) = 0xa1aa; \ - else if ((code) == 0x2015) (assi) = 0xa844; \ - else if ((code) == 0x00b7) (assi) = 0xa1a4; \ - else if ((code) != 0x30fb && TRYMAP_ENC_COND(gbcommon, assi, code)); + if ((code) == 0x2014) (assi) = 0xa1aa; \ + else if ((code) == 0x2015) (assi) = 0xa844; \ + else if ((code) == 0x00b7) (assi) = 0xa1a4; \ + else if ((code) != 0x30fb && TRYMAP_ENC_COND(gbcommon, assi, code)); /* * GB2312 codec @@ -42,53 +42,53 @@ ENCODER(gb2312) { - while (inleft > 0) { - Py_UNICODE c = IN1; - DBCHAR code; - - if (c < 0x80) { - WRITE1((unsigned char)c) - NEXT(1, 1) - continue; - } - UCS4INVALID(c) - - REQUIRE_OUTBUF(2) - TRYMAP_ENC(gbcommon, code, c); - else return 1; - - if (code & 0x8000) /* MSB set: GBK */ - return 1; - - OUT1((code >> 8) | 0x80) - OUT2((code & 0xFF) | 0x80) - NEXT(1, 2) - } - - return 0; + while (inleft > 0) { + Py_UNICODE c = IN1; + DBCHAR code; + + if (c < 0x80) { + WRITE1((unsigned char)c) + NEXT(1, 1) + continue; + } + UCS4INVALID(c) + + REQUIRE_OUTBUF(2) + TRYMAP_ENC(gbcommon, code, c); + else return 1; + + if (code & 0x8000) /* MSB set: GBK */ + return 1; + + OUT1((code >> 8) | 0x80) + OUT2((code & 0xFF) | 0x80) + NEXT(1, 2) + } + + return 0; } DECODER(gb2312) { - while (inleft > 0) { - unsigned char c = **inbuf; + while (inleft > 0) { + unsigned char c = **inbuf; - REQUIRE_OUTBUF(1) + REQUIRE_OUTBUF(1) - if (c < 0x80) { - OUT1(c) - NEXT(1, 1) - continue; - } + if (c < 0x80) { + OUT1(c) + NEXT(1, 1) + continue; + } - REQUIRE_INBUF(2) - TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) { - NEXT(2, 1) - } - else return 2; - } + REQUIRE_INBUF(2) + TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) { + NEXT(2, 1) + } + else return 2; + } - return 0; + return 0; } @@ -98,55 +98,55 @@ DECODER(gb2312) ENCODER(gbk) { - while (inleft > 0) { - Py_UNICODE c = IN1; - DBCHAR code; - - if (c < 0x80) { - WRITE1((unsigned char)c) - NEXT(1, 1) - continue; - } - UCS4INVALID(c) - - REQUIRE_OUTBUF(2) - - GBK_ENCODE(c, code) - else return 1; - - OUT1((code >> 8) | 0x80) - if (code & 0x8000) - OUT2((code & 0xFF)) /* MSB set: GBK */ - else - OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */ - NEXT(1, 2) - } - - return 0; + while (inleft > 0) { + Py_UNICODE c = IN1; + DBCHAR code; + + if (c < 0x80) { + WRITE1((unsigned char)c) + NEXT(1, 1) + continue; + } + UCS4INVALID(c) + + REQUIRE_OUTBUF(2) + + GBK_ENCODE(c, code) + else return 1; + + OUT1((code >> 8) | 0x80) + if (code & 0x8000) + OUT2((code & 0xFF)) /* MSB set: GBK */ + else + OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */ + NEXT(1, 2) + } + + return 0; } DECODER(gbk) { - while (inleft > 0) { - unsigned char c = IN1; + while (inleft > 0) { + unsigned char c = IN1; - REQUIRE_OUTBUF(1) + REQUIRE_OUTBUF(1) - if (c < 0x80) { - OUT1(c) - NEXT(1, 1) - continue; - } + if (c < 0x80) { + OUT1(c) + NEXT(1, 1) + continue; + } - REQUIRE_INBUF(2) + REQUIRE_INBUF(2) - GBK_DECODE(c, IN2, **outbuf) - else return 2; + GBK_DECODE(c, IN2, **outbuf) + else return 2; - NEXT(2, 1) - } + NEXT(2, 1) + } - return 0; + return 0; } @@ -156,153 +156,153 @@ DECODER(gbk) ENCODER(gb18030) { - while (inleft > 0) { - ucs4_t c = IN1; - DBCHAR code; - - if (c < 0x80) { - WRITE1(c) - NEXT(1, 1) - continue; - } - - DECODE_SURROGATE(c) - if (c > 0x10FFFF) + while (inleft > 0) { + ucs4_t c = IN1; + DBCHAR code; + + if (c < 0x80) { + WRITE1(c) + NEXT(1, 1) + continue; + } + + DECODE_SURROGATE(c) + if (c > 0x10FFFF) #if Py_UNICODE_SIZE == 2 - return 2; /* surrogates pair */ + return 2; /* surrogates pair */ #else - return 1; + return 1; #endif - else if (c >= 0x10000) { - ucs4_t tc = c - 0x10000; + else if (c >= 0x10000) { + ucs4_t tc = c - 0x10000; - REQUIRE_OUTBUF(4) + REQUIRE_OUTBUF(4) - OUT4((unsigned char)(tc % 10) + 0x30) - tc /= 10; - OUT3((unsigned char)(tc % 126) + 0x81) - tc /= 126; - OUT2((unsigned char)(tc % 10) + 0x30) - tc /= 10; - OUT1((unsigned char)(tc + 0x90)) + OUT4((unsigned char)(tc % 10) + 0x30) + tc /= 10; + OUT3((unsigned char)(tc % 126) + 0x81) + tc /= 126; + OUT2((unsigned char)(tc % 10) + 0x30) + tc /= 10; + OUT1((unsigned char)(tc + 0x90)) #if Py_UNICODE_SIZE == 2 - NEXT(2, 4) /* surrogates pair */ + NEXT(2, 4) /* surrogates pair */ #else - NEXT(1, 4) + NEXT(1, 4) #endif - continue; - } - - REQUIRE_OUTBUF(2) - - GBK_ENCODE(c, code) - else TRYMAP_ENC(gb18030ext, code, c); - else { - const struct _gb18030_to_unibmp_ranges *utrrange; - - REQUIRE_OUTBUF(4) - - for (utrrange = gb18030_to_unibmp_ranges; - utrrange->first != 0; - utrrange++) - if (utrrange->first <= c && - c <= utrrange->last) { - Py_UNICODE tc; - - tc = c - utrrange->first + - utrrange->base; - - OUT4((unsigned char)(tc % 10) + 0x30) - tc /= 10; - OUT3((unsigned char)(tc % 126) + 0x81) - tc /= 126; - OUT2((unsigned char)(tc % 10) + 0x30) - tc /= 10; - OUT1((unsigned char)tc + 0x81) - - NEXT(1, 4) - break; - } - - if (utrrange->first == 0) - return 1; - continue; - } - - OUT1((code >> 8) | 0x80) - if (code & 0x8000) - OUT2((code & 0xFF)) /* MSB set: GBK or GB18030ext */ - else - OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */ - - NEXT(1, 2) - } - - return 0; + continue; + } + + REQUIRE_OUTBUF(2) + + GBK_ENCODE(c, code) + else TRYMAP_ENC(gb18030ext, code, c); + else { + const struct _gb18030_to_unibmp_ranges *utrrange; + + REQUIRE_OUTBUF(4) + + for (utrrange = gb18030_to_unibmp_ranges; + utrrange->first != 0; + utrrange++) + if (utrrange->first <= c && + c <= utrrange->last) { + Py_UNICODE tc; + + tc = c - utrrange->first + + utrrange->base; + + OUT4((unsigned char)(tc % 10) + 0x30) + tc /= 10; + OUT3((unsigned char)(tc % 126) + 0x81) + tc /= 126; + OUT2((unsigned char)(tc % 10) + 0x30) + tc /= 10; + OUT1((unsigned char)tc + 0x81) + + NEXT(1, 4) + break; + } + + if (utrrange->first == 0) + return 1; + continue; + } + + OUT1((code >> 8) | 0x80) + if (code & 0x8000) + OUT2((code & 0xFF)) /* MSB set: GBK or GB18030ext */ + else + OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */ + + NEXT(1, 2) + } + + return 0; } DECODER(gb18030) { - while (inleft > 0) { - unsigned char c = IN1, c2; - - REQUIRE_OUTBUF(1) - - if (c < 0x80) { - OUT1(c) - NEXT(1, 1) - continue; - } - - REQUIRE_INBUF(2) - - c2 = IN2; - if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */ - const struct _gb18030_to_unibmp_ranges *utr; - unsigned char c3, c4; - ucs4_t lseq; - - REQUIRE_INBUF(4) - c3 = IN3; - c4 = IN4; - if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39) - return 4; - c -= 0x81; c2 -= 0x30; - c3 -= 0x81; c4 -= 0x30; - - if (c < 4) { /* U+0080 - U+FFFF */ - lseq = ((ucs4_t)c * 10 + c2) * 1260 + - (ucs4_t)c3 * 10 + c4; - if (lseq < 39420) { - for (utr = gb18030_to_unibmp_ranges; - lseq >= (utr + 1)->base; - utr++) ; - OUT1(utr->first - utr->base + lseq) - NEXT(4, 1) - continue; - } - } - else if (c >= 15) { /* U+10000 - U+10FFFF */ - lseq = 0x10000 + (((ucs4_t)c-15) * 10 + c2) - * 1260 + (ucs4_t)c3 * 10 + c4; - if (lseq <= 0x10FFFF) { - WRITEUCS4(lseq); - NEXT_IN(4) - continue; - } - } - return 4; - } - - GBK_DECODE(c, c2, **outbuf) - else TRYMAP_DEC(gb18030ext, **outbuf, c, c2); - else return 2; - - NEXT(2, 1) - } - - return 0; + while (inleft > 0) { + unsigned char c = IN1, c2; + + REQUIRE_OUTBUF(1) + + if (c < 0x80) { + OUT1(c) + NEXT(1, 1) + continue; + } + + REQUIRE_INBUF(2) + + c2 = IN2; + if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */ + const struct _gb18030_to_unibmp_ranges *utr; + unsigned char c3, c4; + ucs4_t lseq; + + REQUIRE_INBUF(4) + c3 = IN3; + c4 = IN4; + if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39) + return 4; + c -= 0x81; c2 -= 0x30; + c3 -= 0x81; c4 -= 0x30; + + if (c < 4) { /* U+0080 - U+FFFF */ + lseq = ((ucs4_t)c * 10 + c2) * 1260 + + (ucs4_t)c3 * 10 + c4; + if (lseq < 39420) { + for (utr = gb18030_to_unibmp_ranges; + lseq >= (utr + 1)->base; + utr++) ; + OUT1(utr->first - utr->base + lseq) + NEXT(4, 1) + continue; + } + } + else if (c >= 15) { /* U+10000 - U+10FFFF */ + lseq = 0x10000 + (((ucs4_t)c-15) * 10 + c2) + * 1260 + (ucs4_t)c3 * 10 + c4; + if (lseq <= 0x10FFFF) { + WRITEUCS4(lseq); + NEXT_IN(4) + continue; + } + } + return 4; + } + + GBK_DECODE(c, c2, **outbuf) + else TRYMAP_DEC(gb18030ext, **outbuf, c, c2); + else return 2; + + NEXT(2, 1) + } + + return 0; } @@ -312,118 +312,118 @@ DECODER(gb18030) ENCODER_INIT(hz) { - state->i = 0; - return 0; + state->i = 0; + return 0; } ENCODER_RESET(hz) { - if (state->i != 0) { - WRITE2('~', '}') - state->i = 0; - NEXT_OUT(2) - } - return 0; + if (state->i != 0) { + WRITE2('~', '}') + state->i = 0; + NEXT_OUT(2) + } + return 0; } ENCODER(hz) { - while (inleft > 0) { - Py_UNICODE c = IN1; - DBCHAR code; - - if (c < 0x80) { - if (state->i == 0) { - WRITE1((unsigned char)c) - NEXT(1, 1) - } - else { - WRITE3('~', '}', (unsigned char)c) - NEXT(1, 3) - state->i = 0; - } - continue; - } - - UCS4INVALID(c) - - TRYMAP_ENC(gbcommon, code, c); - else return 1; - - if (code & 0x8000) /* MSB set: GBK */ - return 1; - - if (state->i == 0) { - WRITE4('~', '{', code >> 8, code & 0xff) - NEXT(1, 4) - state->i = 1; - } - else { - WRITE2(code >> 8, code & 0xff) - NEXT(1, 2) - } - } - - return 0; + while (inleft > 0) { + Py_UNICODE c = IN1; + DBCHAR code; + + if (c < 0x80) { + if (state->i == 0) { + WRITE1((unsigned char)c) + NEXT(1, 1) + } + else { + WRITE3('~', '}', (unsigned char)c) + NEXT(1, 3) + state->i = 0; + } + continue; + } + + UCS4INVALID(c) + + TRYMAP_ENC(gbcommon, code, c); + else return 1; + + if (code & 0x8000) /* MSB set: GBK */ + return 1; + + if (state->i == 0) { + WRITE4('~', '{', code >> 8, code & 0xff) + NEXT(1, 4) + state->i = 1; + } + else { + WRITE2(code >> 8, code & 0xff) + NEXT(1, 2) + } + } + + return 0; } DECODER_INIT(hz) { - state->i = 0; - return 0; + state->i = 0; + return 0; } DECODER_RESET(hz) { - state->i = 0; - return 0; + state->i = 0; + return 0; } DECODER(hz) { - while (inleft > 0) { - unsigned char c = IN1; - - if (c == '~') { - unsigned char c2 = IN2; - - REQUIRE_INBUF(2) - if (c2 == '~') { - WRITE1('~') - NEXT(2, 1) - continue; - } - else if (c2 == '{' && state->i == 0) - state->i = 1; /* set GB */ - else if (c2 == '}' && state->i == 1) - state->i = 0; /* set ASCII */ - else if (c2 == '\n') - ; /* line-continuation */ - else - return 2; - NEXT(2, 0); - continue; - } - - if (c & 0x80) - return 1; - - if (state->i == 0) { /* ASCII mode */ - WRITE1(c) - NEXT(1, 1) - } - else { /* GB mode */ - REQUIRE_INBUF(2) - REQUIRE_OUTBUF(1) - TRYMAP_DEC(gb2312, **outbuf, c, IN2) { - NEXT(2, 1) - } - else - return 2; - } - } - - return 0; + while (inleft > 0) { + unsigned char c = IN1; + + if (c == '~') { + unsigned char c2 = IN2; + + REQUIRE_INBUF(2) + if (c2 == '~') { + WRITE1('~') + NEXT(2, 1) + continue; + } + else if (c2 == '{' && state->i == 0) + state->i = 1; /* set GB */ + else if (c2 == '}' && state->i == 1) + state->i = 0; /* set ASCII */ + else if (c2 == '\n') + ; /* line-continuation */ + else + return 2; + NEXT(2, 0); + continue; + } + + if (c & 0x80) + return 1; + + if (state->i == 0) { /* ASCII mode */ + WRITE1(c) + NEXT(1, 1) + } + else { /* GB mode */ + REQUIRE_INBUF(2) + REQUIRE_OUTBUF(1) + TRYMAP_DEC(gb2312, **outbuf, c, IN2) { + NEXT(2, 1) + } + else + return 2; + } + } + + return 0; } diff --git a/Modules/cjkcodecs/_codecs_hk.c b/Modules/cjkcodecs/_codecs_hk.c index 4bbd622..aaf103d 100644 --- a/Modules/cjkcodecs/_codecs_hk.c +++ b/Modules/cjkcodecs/_codecs_hk.c @@ -18,12 +18,12 @@ static const decode_map *big5_decmap = NULL; CODEC_INIT(big5hkscs) { - static int initialized = 0; + static int initialized = 0; - if (!initialized && IMPORT_MAP(tw, big5, &big5_encmap, &big5_decmap)) - return -1; - initialized = 1; - return 0; + if (!initialized && IMPORT_MAP(tw, big5, &big5_encmap, &big5_decmap)) + return -1; + initialized = 1; + return 0; } /* @@ -38,135 +38,135 @@ static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5 ENCODER(big5hkscs) { - while (inleft > 0) { - ucs4_t c = **inbuf; - DBCHAR code; - Py_ssize_t insize; - - if (c < 0x80) { - REQUIRE_OUTBUF(1) - **outbuf = (unsigned char)c; - NEXT(1, 1) - continue; - } - - DECODE_SURROGATE(c) - insize = GET_INSIZE(c); - - REQUIRE_OUTBUF(2) - - if (c < 0x10000) { - TRYMAP_ENC(big5hkscs_bmp, code, c) { - if (code == MULTIC) { - if (inleft >= 2 && - ((c & 0xffdf) == 0x00ca) && - (((*inbuf)[1] & 0xfff7) == 0x0304)) { - code = big5hkscs_pairenc_table[ - ((c >> 4) | - ((*inbuf)[1] >> 3)) & 3]; - insize = 2; - } - else if (inleft < 2 && - !(flags & MBENC_FLUSH)) - return MBERR_TOOFEW; - else { - if (c == 0xca) - code = 0x8866; - else /* c == 0xea */ - code = 0x88a7; - } - } - } - else TRYMAP_ENC(big5, code, c); - else return 1; - } - else if (c < 0x20000) - return insize; - else if (c < 0x30000) { - TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff); - else return insize; - } - else - return insize; - - OUT1(code >> 8) - OUT2(code & 0xFF) - NEXT(insize, 2) - } - - return 0; + while (inleft > 0) { + ucs4_t c = **inbuf; + DBCHAR code; + Py_ssize_t insize; + + if (c < 0x80) { + REQUIRE_OUTBUF(1) + **outbuf = (unsigned char)c; + NEXT(1, 1) + continue; + } + + DECODE_SURROGATE(c) + insize = GET_INSIZE(c); + + REQUIRE_OUTBUF(2) + + if (c < 0x10000) { + TRYMAP_ENC(big5hkscs_bmp, code, c) { + if (code == MULTIC) { + if (inleft >= 2 && + ((c & 0xffdf) == 0x00ca) && + (((*inbuf)[1] & 0xfff7) == 0x0304)) { + code = big5hkscs_pairenc_table[ + ((c >> 4) | + ((*inbuf)[1] >> 3)) & 3]; + insize = 2; + } + else if (inleft < 2 && + !(flags & MBENC_FLUSH)) + return MBERR_TOOFEW; + else { + if (c == 0xca) + code = 0x8866; + else /* c == 0xea */ + code = 0x88a7; + } + } + } + else TRYMAP_ENC(big5, code, c); + else return 1; + } + else if (c < 0x20000) + return insize; + else if (c < 0x30000) { + TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff); + else return insize; + } + else + return insize; + + OUT1(code >> 8) + OUT2(code & 0xFF) + NEXT(insize, 2) + } + + return 0; } #define BH2S(c1, c2) (((c1) - 0x87) * (0xfe - 0x40 + 1) + ((c2) - 0x40)) DECODER(big5hkscs) { - while (inleft > 0) { - unsigned char c = IN1; - ucs4_t decoded; - - REQUIRE_OUTBUF(1) - - if (c < 0x80) { - OUT1(c) - NEXT(1, 1) - continue; - } - - REQUIRE_INBUF(2) - - if (0xc6 <= c && c <= 0xc8 && (c >= 0xc7 || IN2 >= 0xa1)) - goto hkscsdec; - - TRYMAP_DEC(big5, **outbuf, c, IN2) { - NEXT(2, 1) - } - else -hkscsdec: TRYMAP_DEC(big5hkscs, decoded, c, IN2) { - int s = BH2S(c, IN2); - const unsigned char *hintbase; - - assert(0x87 <= c && c <= 0xfe); - assert(0x40 <= IN2 && IN2 <= 0xfe); - - if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) { - hintbase = big5hkscs_phint_0; - s -= BH2S(0x87, 0x40); - } - else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){ - hintbase = big5hkscs_phint_12130; - s -= BH2S(0xc6, 0xa1); - } - else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){ - hintbase = big5hkscs_phint_21924; - s -= BH2S(0xf9, 0xd6); - } - else - return MBERR_INTERNAL; - - if (hintbase[s >> 3] & (1 << (s & 7))) { - WRITEUCS4(decoded | 0x20000) - NEXT_IN(2) - } - else { - OUT1(decoded) - NEXT(2, 1) - } - } - else { - switch ((c << 8) | IN2) { - case 0x8862: WRITE2(0x00ca, 0x0304); break; - case 0x8864: WRITE2(0x00ca, 0x030c); break; - case 0x88a3: WRITE2(0x00ea, 0x0304); break; - case 0x88a5: WRITE2(0x00ea, 0x030c); break; - default: return 2; - } - - NEXT(2, 2) /* all decoded codepoints are pairs, above. */ - } - } - - return 0; + while (inleft > 0) { + unsigned char c = IN1; + ucs4_t decoded; + + REQUIRE_OUTBUF(1) + + if (c < 0x80) { + OUT1(c) + NEXT(1, 1) + continue; + } + + REQUIRE_INBUF(2) + + if (0xc6 <= c && c <= 0xc8 && (c >= 0xc7 || IN2 >= 0xa1)) + goto hkscsdec; + + TRYMAP_DEC(big5, **outbuf, c, IN2) { + NEXT(2, 1) + } + else +hkscsdec: TRYMAP_DEC(big5hkscs, decoded, c, IN2) { + int s = BH2S(c, IN2); + const unsigned char *hintbase; + + assert(0x87 <= c && c <= 0xfe); + assert(0x40 <= IN2 && IN2 <= 0xfe); + + if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) { + hintbase = big5hkscs_phint_0; + s -= BH2S(0x87, 0x40); + } + else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){ + hintbase = big5hkscs_phint_12130; + s -= BH2S(0xc6, 0xa1); + } + else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){ + hintbase = big5hkscs_phint_21924; + s -= BH2S(0xf9, 0xd6); + } + else + return MBERR_INTERNAL; + + if (hintbase[s >> 3] & (1 << (s & 7))) { + WRITEUCS4(decoded | 0x20000) + NEXT_IN(2) + } + else { + OUT1(decoded) + NEXT(2, 1) + } + } + else { + switch ((c << 8) | IN2) { + case 0x8862: WRITE2(0x00ca, 0x0304); break; + case 0x8864: WRITE2(0x00ca, 0x030c); break; + case 0x88a3: WRITE2(0x00ea, 0x0304); break; + case 0x88a5: WRITE2(0x00ea, 0x030c); break; + default: return 2; + } + + NEXT(2, 2) /* all decoded codepoints are pairs, above. */ + } + } + + return 0; } diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c index 9ce7b75..25c1a36 100644 --- a/Modules/cjkcodecs/_codecs_iso2022.c +++ b/Modules/cjkcodecs/_codecs_iso2022.c @@ -19,85 +19,85 @@ state->c[0-3] - 00000000 - ||^^^^^| - |+-----+---- G0-3 Character Set - +----------- Is G0-3 double byte? + 00000000 + ||^^^^^| + |+-----+---- G0-3 Character Set + +----------- Is G0-3 double byte? state->c[4] - 00000000 - || - |+---- Locked-Shift? - +----- ESC Throughout + 00000000 + || + |+---- Locked-Shift? + +----- ESC Throughout */ -#define ESC 0x1B -#define SO 0x0E -#define SI 0x0F -#define LF 0x0A - -#define MAX_ESCSEQLEN 16 - -#define CHARSET_ISO8859_1 'A' -#define CHARSET_ASCII 'B' -#define CHARSET_ISO8859_7 'F' -#define CHARSET_JISX0201_K 'I' -#define CHARSET_JISX0201_R 'J' - -#define CHARSET_GB2312 ('A'|CHARSET_DBCS) -#define CHARSET_JISX0208 ('B'|CHARSET_DBCS) -#define CHARSET_KSX1001 ('C'|CHARSET_DBCS) -#define CHARSET_JISX0212 ('D'|CHARSET_DBCS) -#define CHARSET_GB2312_8565 ('E'|CHARSET_DBCS) -#define CHARSET_CNS11643_1 ('G'|CHARSET_DBCS) -#define CHARSET_CNS11643_2 ('H'|CHARSET_DBCS) -#define CHARSET_JISX0213_2000_1 ('O'|CHARSET_DBCS) -#define CHARSET_JISX0213_2 ('P'|CHARSET_DBCS) -#define CHARSET_JISX0213_2004_1 ('Q'|CHARSET_DBCS) -#define CHARSET_JISX0208_O ('@'|CHARSET_DBCS) - -#define CHARSET_DBCS 0x80 -#define ESCMARK(mark) ((mark) & 0x7f) - -#define IS_ESCEND(c) (((c) >= 'A' && (c) <= 'Z') || (c) == '@') +#define ESC 0x1B +#define SO 0x0E +#define SI 0x0F +#define LF 0x0A + +#define MAX_ESCSEQLEN 16 + +#define CHARSET_ISO8859_1 'A' +#define CHARSET_ASCII 'B' +#define CHARSET_ISO8859_7 'F' +#define CHARSET_JISX0201_K 'I' +#define CHARSET_JISX0201_R 'J' + +#define CHARSET_GB2312 ('A'|CHARSET_DBCS) +#define CHARSET_JISX0208 ('B'|CHARSET_DBCS) +#define CHARSET_KSX1001 ('C'|CHARSET_DBCS) +#define CHARSET_JISX0212 ('D'|CHARSET_DBCS) +#define CHARSET_GB2312_8565 ('E'|CHARSET_DBCS) +#define CHARSET_CNS11643_1 ('G'|CHARSET_DBCS) +#define CHARSET_CNS11643_2 ('H'|CHARSET_DBCS) +#define CHARSET_JISX0213_2000_1 ('O'|CHARSET_DBCS) +#define CHARSET_JISX0213_2 ('P'|CHARSET_DBCS) +#define CHARSET_JISX0213_2004_1 ('Q'|CHARSET_DBCS) +#define CHARSET_JISX0208_O ('@'|CHARSET_DBCS) + +#define CHARSET_DBCS 0x80 +#define ESCMARK(mark) ((mark) & 0x7f) + +#define IS_ESCEND(c) (((c) >= 'A' && (c) <= 'Z') || (c) == '@') #define IS_ISO2022ESC(c2) \ - ((c2) == '(' || (c2) == ')' || (c2) == '$' || \ - (c2) == '.' || (c2) == '&') - /* this is not a complete list of ISO-2022 escape sequence headers. - * but, it's enough to implement CJK instances of iso-2022. */ - -#define MAP_UNMAPPABLE 0xFFFF -#define MAP_MULTIPLE_AVAIL 0xFFFE /* for JIS X 0213 */ - -#define F_SHIFTED 0x01 -#define F_ESCTHROUGHOUT 0x02 - -#define STATE_SETG(dn, v) ((state)->c[dn]) = (v); -#define STATE_GETG(dn) ((state)->c[dn]) - -#define STATE_G0 STATE_GETG(0) -#define STATE_G1 STATE_GETG(1) -#define STATE_G2 STATE_GETG(2) -#define STATE_G3 STATE_GETG(3) -#define STATE_SETG0(v) STATE_SETG(0, v) -#define STATE_SETG1(v) STATE_SETG(1, v) -#define STATE_SETG2(v) STATE_SETG(2, v) -#define STATE_SETG3(v) STATE_SETG(3, v) - -#define STATE_SETFLAG(f) ((state)->c[4]) |= (f); -#define STATE_GETFLAG(f) ((state)->c[4] & (f)) -#define STATE_CLEARFLAG(f) ((state)->c[4]) &= ~(f); -#define STATE_CLEARFLAGS() ((state)->c[4]) = 0; - -#define ISO2022_CONFIG ((const struct iso2022_config *)config) -#define CONFIG_ISSET(flag) (ISO2022_CONFIG->flags & (flag)) -#define CONFIG_DESIGNATIONS (ISO2022_CONFIG->designations) + ((c2) == '(' || (c2) == ')' || (c2) == '$' || \ + (c2) == '.' || (c2) == '&') + /* this is not a complete list of ISO-2022 escape sequence headers. + * but, it's enough to implement CJK instances of iso-2022. */ + +#define MAP_UNMAPPABLE 0xFFFF +#define MAP_MULTIPLE_AVAIL 0xFFFE /* for JIS X 0213 */ + +#define F_SHIFTED 0x01 +#define F_ESCTHROUGHOUT 0x02 + +#define STATE_SETG(dn, v) ((state)->c[dn]) = (v); +#define STATE_GETG(dn) ((state)->c[dn]) + +#define STATE_G0 STATE_GETG(0) +#define STATE_G1 STATE_GETG(1) +#define STATE_G2 STATE_GETG(2) +#define STATE_G3 STATE_GETG(3) +#define STATE_SETG0(v) STATE_SETG(0, v) +#define STATE_SETG1(v) STATE_SETG(1, v) +#define STATE_SETG2(v) STATE_SETG(2, v) +#define STATE_SETG3(v) STATE_SETG(3, v) + +#define STATE_SETFLAG(f) ((state)->c[4]) |= (f); +#define STATE_GETFLAG(f) ((state)->c[4] & (f)) +#define STATE_CLEARFLAG(f) ((state)->c[4]) &= ~(f); +#define STATE_CLEARFLAGS() ((state)->c[4]) = 0; + +#define ISO2022_CONFIG ((const struct iso2022_config *)config) +#define CONFIG_ISSET(flag) (ISO2022_CONFIG->flags & (flag)) +#define CONFIG_DESIGNATIONS (ISO2022_CONFIG->designations) /* iso2022_config.flags */ -#define NO_SHIFT 0x01 -#define USE_G2 0x02 -#define USE_JISX0208_EXT 0x04 +#define NO_SHIFT 0x01 +#define USE_G2 0x02 +#define USE_JISX0208_EXT 0x04 /*-*- internal data structures -*-*/ @@ -106,434 +106,434 @@ typedef ucs4_t (*iso2022_decode_func)(const unsigned char *data); typedef DBCHAR (*iso2022_encode_func)(const ucs4_t *data, Py_ssize_t *length); struct iso2022_designation { - unsigned char mark; - unsigned char plane; - unsigned char width; - iso2022_init_func initializer; - iso2022_decode_func decoder; - iso2022_encode_func encoder; + unsigned char mark; + unsigned char plane; + unsigned char width; + iso2022_init_func initializer; + iso2022_decode_func decoder; + iso2022_encode_func encoder; }; struct iso2022_config { - int flags; - const struct iso2022_designation *designations; /* non-ascii desigs */ + int flags; + const struct iso2022_designation *designations; /* non-ascii desigs */ }; /*-*- iso-2022 codec implementation -*-*/ CODEC_INIT(iso2022) { - const struct iso2022_designation *desig = CONFIG_DESIGNATIONS; - for (desig = CONFIG_DESIGNATIONS; desig->mark; desig++) - if (desig->initializer != NULL && desig->initializer() != 0) - return -1; - return 0; + const struct iso2022_designation *desig = CONFIG_DESIGNATIONS; + for (desig = CONFIG_DESIGNATIONS; desig->mark; desig++) + if (desig->initializer != NULL && desig->initializer() != 0) + return -1; + return 0; } ENCODER_INIT(iso2022) { - STATE_CLEARFLAGS() - STATE_SETG0(CHARSET_ASCII) - STATE_SETG1(CHARSET_ASCII) - return 0; + STATE_CLEARFLAGS() + STATE_SETG0(CHARSET_ASCII) + STATE_SETG1(CHARSET_ASCII) + return 0; } ENCODER_RESET(iso2022) { - if (STATE_GETFLAG(F_SHIFTED)) { - WRITE1(SI) - NEXT_OUT(1) - STATE_CLEARFLAG(F_SHIFTED) - } - if (STATE_G0 != CHARSET_ASCII) { - WRITE3(ESC, '(', 'B') - NEXT_OUT(3) - STATE_SETG0(CHARSET_ASCII) - } - return 0; + if (STATE_GETFLAG(F_SHIFTED)) { + WRITE1(SI) + NEXT_OUT(1) + STATE_CLEARFLAG(F_SHIFTED) + } + if (STATE_G0 != CHARSET_ASCII) { + WRITE3(ESC, '(', 'B') + NEXT_OUT(3) + STATE_SETG0(CHARSET_ASCII) + } + return 0; } ENCODER(iso2022) { - while (inleft > 0) { - const struct iso2022_designation *dsg; - DBCHAR encoded; - ucs4_t c = **inbuf; - Py_ssize_t insize; - - if (c < 0x80) { - if (STATE_G0 != CHARSET_ASCII) { - WRITE3(ESC, '(', 'B') - STATE_SETG0(CHARSET_ASCII) - NEXT_OUT(3) - } - if (STATE_GETFLAG(F_SHIFTED)) { - WRITE1(SI) - STATE_CLEARFLAG(F_SHIFTED) - NEXT_OUT(1) - } - WRITE1((unsigned char)c) - NEXT(1, 1) - continue; - } - - DECODE_SURROGATE(c) - insize = GET_INSIZE(c); - - encoded = MAP_UNMAPPABLE; - for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) { - Py_ssize_t length = 1; - encoded = dsg->encoder(&c, &length); - if (encoded == MAP_MULTIPLE_AVAIL) { - /* this implementation won't work for pair - * of non-bmp characters. */ - if (inleft < 2) { - if (!(flags & MBENC_FLUSH)) - return MBERR_TOOFEW; - length = -1; - } - else - length = 2; + while (inleft > 0) { + const struct iso2022_designation *dsg; + DBCHAR encoded; + ucs4_t c = **inbuf; + Py_ssize_t insize; + + if (c < 0x80) { + if (STATE_G0 != CHARSET_ASCII) { + WRITE3(ESC, '(', 'B') + STATE_SETG0(CHARSET_ASCII) + NEXT_OUT(3) + } + if (STATE_GETFLAG(F_SHIFTED)) { + WRITE1(SI) + STATE_CLEARFLAG(F_SHIFTED) + NEXT_OUT(1) + } + WRITE1((unsigned char)c) + NEXT(1, 1) + continue; + } + + DECODE_SURROGATE(c) + insize = GET_INSIZE(c); + + encoded = MAP_UNMAPPABLE; + for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) { + Py_ssize_t length = 1; + encoded = dsg->encoder(&c, &length); + if (encoded == MAP_MULTIPLE_AVAIL) { + /* this implementation won't work for pair + * of non-bmp characters. */ + if (inleft < 2) { + if (!(flags & MBENC_FLUSH)) + return MBERR_TOOFEW; + length = -1; + } + else + length = 2; #if Py_UNICODE_SIZE == 2 - if (length == 2) { - ucs4_t u4in[2]; - u4in[0] = (ucs4_t)IN1; - u4in[1] = (ucs4_t)IN2; - encoded = dsg->encoder(u4in, &length); - } else - encoded = dsg->encoder(&c, &length); + if (length == 2) { + ucs4_t u4in[2]; + u4in[0] = (ucs4_t)IN1; + u4in[1] = (ucs4_t)IN2; + encoded = dsg->encoder(u4in, &length); + } else + encoded = dsg->encoder(&c, &length); #else - encoded = dsg->encoder(&c, &length); + encoded = dsg->encoder(&c, &length); #endif - if (encoded != MAP_UNMAPPABLE) { - insize = length; - break; - } - } - else if (encoded != MAP_UNMAPPABLE) - break; - } - - if (!dsg->mark) - return 1; - assert(dsg->width == 1 || dsg->width == 2); - - switch (dsg->plane) { - case 0: /* G0 */ - if (STATE_GETFLAG(F_SHIFTED)) { - WRITE1(SI) - STATE_CLEARFLAG(F_SHIFTED) - NEXT_OUT(1) - } - if (STATE_G0 != dsg->mark) { - if (dsg->width == 1) { - WRITE3(ESC, '(', ESCMARK(dsg->mark)) - STATE_SETG0(dsg->mark) - NEXT_OUT(3) - } - else if (dsg->mark == CHARSET_JISX0208) { - WRITE3(ESC, '$', ESCMARK(dsg->mark)) - STATE_SETG0(dsg->mark) - NEXT_OUT(3) - } - else { - WRITE4(ESC, '$', '(', - ESCMARK(dsg->mark)) - STATE_SETG0(dsg->mark) - NEXT_OUT(4) - } - } - break; - case 1: /* G1 */ - if (STATE_G1 != dsg->mark) { - if (dsg->width == 1) { - WRITE3(ESC, ')', ESCMARK(dsg->mark)) - STATE_SETG1(dsg->mark) - NEXT_OUT(3) - } - else { - WRITE4(ESC, '$', ')', - ESCMARK(dsg->mark)) - STATE_SETG1(dsg->mark) - NEXT_OUT(4) - } - } - if (!STATE_GETFLAG(F_SHIFTED)) { - WRITE1(SO) - STATE_SETFLAG(F_SHIFTED) - NEXT_OUT(1) - } - break; - default: /* G2 and G3 is not supported: no encoding in - * CJKCodecs are using them yet */ - return MBERR_INTERNAL; - } - - if (dsg->width == 1) { - WRITE1((unsigned char)encoded) - NEXT_OUT(1) - } - else { - WRITE2(encoded >> 8, encoded & 0xff) - NEXT_OUT(2) - } - NEXT_IN(insize) - } - - return 0; + if (encoded != MAP_UNMAPPABLE) { + insize = length; + break; + } + } + else if (encoded != MAP_UNMAPPABLE) + break; + } + + if (!dsg->mark) + return 1; + assert(dsg->width == 1 || dsg->width == 2); + + switch (dsg->plane) { + case 0: /* G0 */ + if (STATE_GETFLAG(F_SHIFTED)) { + WRITE1(SI) + STATE_CLEARFLAG(F_SHIFTED) + NEXT_OUT(1) + } + if (STATE_G0 != dsg->mark) { + if (dsg->width == 1) { + WRITE3(ESC, '(', ESCMARK(dsg->mark)) + STATE_SETG0(dsg->mark) + NEXT_OUT(3) + } + else if (dsg->mark == CHARSET_JISX0208) { + WRITE3(ESC, '$', ESCMARK(dsg->mark)) + STATE_SETG0(dsg->mark) + NEXT_OUT(3) + } + else { + WRITE4(ESC, '$', '(', + ESCMARK(dsg->mark)) + STATE_SETG0(dsg->mark) + NEXT_OUT(4) + } + } + break; + case 1: /* G1 */ + if (STATE_G1 != dsg->mark) { + if (dsg->width == 1) { + WRITE3(ESC, ')', ESCMARK(dsg->mark)) + STATE_SETG1(dsg->mark) + NEXT_OUT(3) + } + else { + WRITE4(ESC, '$', ')', + ESCMARK(dsg->mark)) + STATE_SETG1(dsg->mark) + NEXT_OUT(4) + } + } + if (!STATE_GETFLAG(F_SHIFTED)) { + WRITE1(SO) + STATE_SETFLAG(F_SHIFTED) + NEXT_OUT(1) + } + break; + default: /* G2 and G3 is not supported: no encoding in + * CJKCodecs are using them yet */ + return MBERR_INTERNAL; + } + + if (dsg->width == 1) { + WRITE1((unsigned char)encoded) + NEXT_OUT(1) + } + else { + WRITE2(encoded >> 8, encoded & 0xff) + NEXT_OUT(2) + } + NEXT_IN(insize) + } + + return 0; } DECODER_INIT(iso2022) { - STATE_CLEARFLAGS() - STATE_SETG0(CHARSET_ASCII) - STATE_SETG1(CHARSET_ASCII) - STATE_SETG2(CHARSET_ASCII) - return 0; + STATE_CLEARFLAGS() + STATE_SETG0(CHARSET_ASCII) + STATE_SETG1(CHARSET_ASCII) + STATE_SETG2(CHARSET_ASCII) + return 0; } DECODER_RESET(iso2022) { - STATE_SETG0(CHARSET_ASCII) - STATE_CLEARFLAG(F_SHIFTED) - return 0; + STATE_SETG0(CHARSET_ASCII) + STATE_CLEARFLAG(F_SHIFTED) + return 0; } static Py_ssize_t iso2022processesc(const void *config, MultibyteCodec_State *state, - const unsigned char **inbuf, Py_ssize_t *inleft) + const unsigned char **inbuf, Py_ssize_t *inleft) { - unsigned char charset, designation; - Py_ssize_t i, esclen; - - for (i = 1;i < MAX_ESCSEQLEN;i++) { - if (i >= *inleft) - return MBERR_TOOFEW; - if (IS_ESCEND((*inbuf)[i])) { - esclen = i + 1; - break; - } - else if (CONFIG_ISSET(USE_JISX0208_EXT) && i+1 < *inleft && - (*inbuf)[i] == '&' && (*inbuf)[i+1] == '@') - i += 2; - } - - if (i >= MAX_ESCSEQLEN) - return 1; /* unterminated escape sequence */ - - switch (esclen) { - case 3: - if (IN2 == '$') { - charset = IN3 | CHARSET_DBCS; - designation = 0; - } - else { - charset = IN3; - if (IN2 == '(') designation = 0; - else if (IN2 == ')') designation = 1; - else if (CONFIG_ISSET(USE_G2) && IN2 == '.') - designation = 2; - else return 3; - } - break; - case 4: - if (IN2 != '$') - return 4; - - charset = IN4 | CHARSET_DBCS; - if (IN3 == '(') designation = 0; - else if (IN3 == ')') designation = 1; - else return 4; - break; - case 6: /* designation with prefix */ - if (CONFIG_ISSET(USE_JISX0208_EXT) && - (*inbuf)[3] == ESC && (*inbuf)[4] == '$' && - (*inbuf)[5] == 'B') { - charset = 'B' | CHARSET_DBCS; - designation = 0; - } - else - return 6; - break; - default: - return esclen; - } - - /* raise error when the charset is not designated for this encoding */ - if (charset != CHARSET_ASCII) { - const struct iso2022_designation *dsg; - - for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) - if (dsg->mark == charset) - break; - if (!dsg->mark) - return esclen; - } - - STATE_SETG(designation, charset) - *inleft -= esclen; - (*inbuf) += esclen; - return 0; + unsigned char charset, designation; + Py_ssize_t i, esclen; + + for (i = 1;i < MAX_ESCSEQLEN;i++) { + if (i >= *inleft) + return MBERR_TOOFEW; + if (IS_ESCEND((*inbuf)[i])) { + esclen = i + 1; + break; + } + else if (CONFIG_ISSET(USE_JISX0208_EXT) && i+1 < *inleft && + (*inbuf)[i] == '&' && (*inbuf)[i+1] == '@') + i += 2; + } + + if (i >= MAX_ESCSEQLEN) + return 1; /* unterminated escape sequence */ + + switch (esclen) { + case 3: + if (IN2 == '$') { + charset = IN3 | CHARSET_DBCS; + designation = 0; + } + else { + charset = IN3; + if (IN2 == '(') designation = 0; + else if (IN2 == ')') designation = 1; + else if (CONFIG_ISSET(USE_G2) && IN2 == '.') + designation = 2; + else return 3; + } + break; + case 4: + if (IN2 != '$') + return 4; + + charset = IN4 | CHARSET_DBCS; + if (IN3 == '(') designation = 0; + else if (IN3 == ')') designation = 1; + else return 4; + break; + case 6: /* designation with prefix */ + if (CONFIG_ISSET(USE_JISX0208_EXT) && + (*inbuf)[3] == ESC && (*inbuf)[4] == '$' && + (*inbuf)[5] == 'B') { + charset = 'B' | CHARSET_DBCS; + designation = 0; + } + else + return 6; + break; + default: + return esclen; + } + + /* raise error when the charset is not designated for this encoding */ + if (charset != CHARSET_ASCII) { + const struct iso2022_designation *dsg; + + for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) + if (dsg->mark == charset) + break; + if (!dsg->mark) + return esclen; + } + + STATE_SETG(designation, charset) + *inleft -= esclen; + (*inbuf) += esclen; + return 0; } -#define ISO8859_7_DECODE(c, assi) \ - if ((c) < 0xa0) (assi) = (c); \ - else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \ - (assi) = (c); \ - else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \ - (0xbffffd77L & (1L << ((c)-0xb4))))) \ - (assi) = 0x02d0 + (c); \ - else if ((c) == 0xa1) (assi) = 0x2018; \ - else if ((c) == 0xa2) (assi) = 0x2019; \ - else if ((c) == 0xaf) (assi) = 0x2015; +#define ISO8859_7_DECODE(c, assi) \ + if ((c) < 0xa0) (assi) = (c); \ + else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \ + (assi) = (c); \ + else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \ + (0xbffffd77L & (1L << ((c)-0xb4))))) \ + (assi) = 0x02d0 + (c); \ + else if ((c) == 0xa1) (assi) = 0x2018; \ + else if ((c) == 0xa2) (assi) = 0x2019; \ + else if ((c) == 0xaf) (assi) = 0x2015; static Py_ssize_t iso2022processg2(const void *config, MultibyteCodec_State *state, - const unsigned char **inbuf, Py_ssize_t *inleft, - Py_UNICODE **outbuf, Py_ssize_t *outleft) + const unsigned char **inbuf, Py_ssize_t *inleft, + Py_UNICODE **outbuf, Py_ssize_t *outleft) { - /* not written to use encoder, decoder functions because only few - * encodings use G2 designations in CJKCodecs */ - if (STATE_G2 == CHARSET_ISO8859_1) { - if (IN3 < 0x80) - OUT1(IN3 + 0x80) - else - return 3; - } - else if (STATE_G2 == CHARSET_ISO8859_7) { - ISO8859_7_DECODE(IN3 ^ 0x80, **outbuf) - else return 3; - } - else if (STATE_G2 == CHARSET_ASCII) { - if (IN3 & 0x80) return 3; - else **outbuf = IN3; - } - else - return MBERR_INTERNAL; - - (*inbuf) += 3; - *inleft -= 3; - (*outbuf) += 1; - *outleft -= 1; - return 0; + /* not written to use encoder, decoder functions because only few + * encodings use G2 designations in CJKCodecs */ + if (STATE_G2 == CHARSET_ISO8859_1) { + if (IN3 < 0x80) + OUT1(IN3 + 0x80) + else + return 3; + } + else if (STATE_G2 == CHARSET_ISO8859_7) { + ISO8859_7_DECODE(IN3 ^ 0x80, **outbuf) + else return 3; + } + else if (STATE_G2 == CHARSET_ASCII) { + if (IN3 & 0x80) return 3; + else **outbuf = IN3; + } + else + return MBERR_INTERNAL; + + (*inbuf) += 3; + *inleft -= 3; + (*outbuf) += 1; + *outleft -= 1; + return 0; } DECODER(iso2022) { - const struct iso2022_designation *dsgcache = NULL; - - while (inleft > 0) { - unsigned char c = IN1; - Py_ssize_t err; - - if (STATE_GETFLAG(F_ESCTHROUGHOUT)) { - /* ESC throughout mode: - * for non-iso2022 escape sequences */ - WRITE1(c) /* assume as ISO-8859-1 */ - NEXT(1, 1) - if (IS_ESCEND(c)) { - STATE_CLEARFLAG(F_ESCTHROUGHOUT) - } - continue; - } - - switch (c) { - case ESC: - REQUIRE_INBUF(2) - if (IS_ISO2022ESC(IN2)) { - err = iso2022processesc(config, state, - inbuf, &inleft); - if (err != 0) - return err; - } - else if (CONFIG_ISSET(USE_G2) && IN2 == 'N') {/* SS2 */ - REQUIRE_INBUF(3) - err = iso2022processg2(config, state, - inbuf, &inleft, outbuf, &outleft); - if (err != 0) - return err; - } - else { - WRITE1(ESC) - STATE_SETFLAG(F_ESCTHROUGHOUT) - NEXT(1, 1) - } - break; - case SI: - if (CONFIG_ISSET(NO_SHIFT)) - goto bypass; - STATE_CLEARFLAG(F_SHIFTED) - NEXT_IN(1) - break; - case SO: - if (CONFIG_ISSET(NO_SHIFT)) - goto bypass; - STATE_SETFLAG(F_SHIFTED) - NEXT_IN(1) - break; - case LF: - STATE_CLEARFLAG(F_SHIFTED) - WRITE1(LF) - NEXT(1, 1) - break; - default: - if (c < 0x20) /* C0 */ - goto bypass; - else if (c >= 0x80) - return 1; - else { - const struct iso2022_designation *dsg; - unsigned char charset; - ucs4_t decoded; - - if (STATE_GETFLAG(F_SHIFTED)) - charset = STATE_G1; - else - charset = STATE_G0; - - if (charset == CHARSET_ASCII) { -bypass: WRITE1(c) - NEXT(1, 1) - break; - } - - if (dsgcache != NULL && - dsgcache->mark == charset) - dsg = dsgcache; - else { - for (dsg = CONFIG_DESIGNATIONS; - dsg->mark != charset + const struct iso2022_designation *dsgcache = NULL; + + while (inleft > 0) { + unsigned char c = IN1; + Py_ssize_t err; + + if (STATE_GETFLAG(F_ESCTHROUGHOUT)) { + /* ESC throughout mode: + * for non-iso2022 escape sequences */ + WRITE1(c) /* assume as ISO-8859-1 */ + NEXT(1, 1) + if (IS_ESCEND(c)) { + STATE_CLEARFLAG(F_ESCTHROUGHOUT) + } + continue; + } + + switch (c) { + case ESC: + REQUIRE_INBUF(2) + if (IS_ISO2022ESC(IN2)) { + err = iso2022processesc(config, state, + inbuf, &inleft); + if (err != 0) + return err; + } + else if (CONFIG_ISSET(USE_G2) && IN2 == 'N') {/* SS2 */ + REQUIRE_INBUF(3) + err = iso2022processg2(config, state, + inbuf, &inleft, outbuf, &outleft); + if (err != 0) + return err; + } + else { + WRITE1(ESC) + STATE_SETFLAG(F_ESCTHROUGHOUT) + NEXT(1, 1) + } + break; + case SI: + if (CONFIG_ISSET(NO_SHIFT)) + goto bypass; + STATE_CLEARFLAG(F_SHIFTED) + NEXT_IN(1) + break; + case SO: + if (CONFIG_ISSET(NO_SHIFT)) + goto bypass; + STATE_SETFLAG(F_SHIFTED) + NEXT_IN(1) + break; + case LF: + STATE_CLEARFLAG(F_SHIFTED) + WRITE1(LF) + NEXT(1, 1) + break; + default: + if (c < 0x20) /* C0 */ + goto bypass; + else if (c >= 0x80) + return 1; + else { + const struct iso2022_designation *dsg; + unsigned char charset; + ucs4_t decoded; + + if (STATE_GETFLAG(F_SHIFTED)) + charset = STATE_G1; + else + charset = STATE_G0; + + if (charset == CHARSET_ASCII) { +bypass: WRITE1(c) + NEXT(1, 1) + break; + } + + if (dsgcache != NULL && + dsgcache->mark == charset) + dsg = dsgcache; + else { + for (dsg = CONFIG_DESIGNATIONS; + dsg->mark != charset #ifdef Py_DEBUG - && dsg->mark != '\0' + && dsg->mark != '\0' #endif - ;dsg++) - /* noop */; - assert(dsg->mark != '\0'); - dsgcache = dsg; - } - - REQUIRE_INBUF(dsg->width) - decoded = dsg->decoder(*inbuf); - if (decoded == MAP_UNMAPPABLE) - return dsg->width; - - if (decoded < 0x10000) { - WRITE1(decoded) - NEXT_OUT(1) - } - else if (decoded < 0x30000) { - WRITEUCS4(decoded) - } - else { /* JIS X 0213 pairs */ - WRITE2(decoded >> 16, decoded & 0xffff) - NEXT_OUT(2) - } - NEXT_IN(dsg->width) - } - break; - } - } - return 0; + ;dsg++) + /* noop */; + assert(dsg->mark != '\0'); + dsgcache = dsg; + } + + REQUIRE_INBUF(dsg->width) + decoded = dsg->decoder(*inbuf); + if (decoded == MAP_UNMAPPABLE) + return dsg->width; + + if (decoded < 0x10000) { + WRITE1(decoded) + NEXT_OUT(1) + } + else if (decoded < 0x30000) { + WRITEUCS4(decoded) + } + else { /* JIS X 0213 pairs */ + WRITE2(decoded >> 16, decoded & 0xffff) + NEXT_OUT(2) + } + NEXT_IN(dsg->width) + } + break; + } + } + return 0; } /*-*- mapping table holders -*-*/ @@ -567,542 +567,542 @@ DECMAP(gb2312) static int ksx1001_init(void) { - static int initialized = 0; - - if (!initialized && ( - IMPORT_MAP(kr, cp949, &cp949_encmap, NULL) || - IMPORT_MAP(kr, ksx1001, NULL, &ksx1001_decmap))) - return -1; - initialized = 1; - return 0; + static int initialized = 0; + + if (!initialized && ( + IMPORT_MAP(kr, cp949, &cp949_encmap, NULL) || + IMPORT_MAP(kr, ksx1001, NULL, &ksx1001_decmap))) + return -1; + initialized = 1; + return 0; } static ucs4_t ksx1001_decoder(const unsigned char *data) { - ucs4_t u; - TRYMAP_DEC(ksx1001, u, data[0], data[1]) - return u; - else - return MAP_UNMAPPABLE; + ucs4_t u; + TRYMAP_DEC(ksx1001, u, data[0], data[1]) + return u; + else + return MAP_UNMAPPABLE; } static DBCHAR ksx1001_encoder(const ucs4_t *data, Py_ssize_t *length) { - DBCHAR coded; - assert(*length == 1); - if (*data < 0x10000) { - TRYMAP_ENC(cp949, coded, *data) - if (!(coded & 0x8000)) - return coded; - } - return MAP_UNMAPPABLE; + DBCHAR coded; + assert(*length == 1); + if (*data < 0x10000) { + TRYMAP_ENC(cp949, coded, *data) + if (!(coded & 0x8000)) + return coded; + } + return MAP_UNMAPPABLE; } static int jisx0208_init(void) { - static int initialized = 0; - - if (!initialized && ( - IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL) || - IMPORT_MAP(jp, jisx0208, NULL, &jisx0208_decmap))) - return -1; - initialized = 1; - return 0; + static int initialized = 0; + + if (!initialized && ( + IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL) || + IMPORT_MAP(jp, jisx0208, NULL, &jisx0208_decmap))) + return -1; + initialized = 1; + return 0; } static ucs4_t jisx0208_decoder(const unsigned char *data) { - ucs4_t u; - if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */ - return 0xff3c; - else TRYMAP_DEC(jisx0208, u, data[0], data[1]) - return u; - else - return MAP_UNMAPPABLE; + ucs4_t u; + if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */ + return 0xff3c; + else TRYMAP_DEC(jisx0208, u, data[0], data[1]) + return u; + else + return MAP_UNMAPPABLE; } static DBCHAR jisx0208_encoder(const ucs4_t *data, Py_ssize_t *length) { - DBCHAR coded; - assert(*length == 1); - if (*data < 0x10000) { - if (*data == 0xff3c) /* F/W REVERSE SOLIDUS */ - return 0x2140; - else TRYMAP_ENC(jisxcommon, coded, *data) { - if (!(coded & 0x8000)) - return coded; - } - } - return MAP_UNMAPPABLE; + DBCHAR coded; + assert(*length == 1); + if (*data < 0x10000) { + if (*data == 0xff3c) /* F/W REVERSE SOLIDUS */ + return 0x2140; + else TRYMAP_ENC(jisxcommon, coded, *data) { + if (!(coded & 0x8000)) + return coded; + } + } + return MAP_UNMAPPABLE; } static int jisx0212_init(void) { - static int initialized = 0; - - if (!initialized && ( - IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL) || - IMPORT_MAP(jp, jisx0212, NULL, &jisx0212_decmap))) - return -1; - initialized = 1; - return 0; + static int initialized = 0; + + if (!initialized && ( + IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL) || + IMPORT_MAP(jp, jisx0212, NULL, &jisx0212_decmap))) + return -1; + initialized = 1; + return 0; } static ucs4_t jisx0212_decoder(const unsigned char *data) { - ucs4_t u; - TRYMAP_DEC(jisx0212, u, data[0], data[1]) - return u; - else - return MAP_UNMAPPABLE; + ucs4_t u; + TRYMAP_DEC(jisx0212, u, data[0], data[1]) + return u; + else + return MAP_UNMAPPABLE; } static DBCHAR jisx0212_encoder(const ucs4_t *data, Py_ssize_t *length) { - DBCHAR coded; - assert(*length == 1); - if (*data < 0x10000) { - TRYMAP_ENC(jisxcommon, coded, *data) { - if (coded & 0x8000) - return coded & 0x7fff; - } - } - return MAP_UNMAPPABLE; + DBCHAR coded; + assert(*length == 1); + if (*data < 0x10000) { + TRYMAP_ENC(jisxcommon, coded, *data) { + if (coded & 0x8000) + return coded & 0x7fff; + } + } + return MAP_UNMAPPABLE; } static int jisx0213_init(void) { - static int initialized = 0; - - if (!initialized && ( - jisx0208_init() || - IMPORT_MAP(jp, jisx0213_bmp, - &jisx0213_bmp_encmap, NULL) || - IMPORT_MAP(jp, jisx0213_1_bmp, - NULL, &jisx0213_1_bmp_decmap) || - IMPORT_MAP(jp, jisx0213_2_bmp, - NULL, &jisx0213_2_bmp_decmap) || - IMPORT_MAP(jp, jisx0213_emp, - &jisx0213_emp_encmap, NULL) || - IMPORT_MAP(jp, jisx0213_1_emp, - NULL, &jisx0213_1_emp_decmap) || - IMPORT_MAP(jp, jisx0213_2_emp, - NULL, &jisx0213_2_emp_decmap) || - IMPORT_MAP(jp, jisx0213_pair, &jisx0213_pair_encmap, - &jisx0213_pair_decmap))) - return -1; - initialized = 1; - return 0; + static int initialized = 0; + + if (!initialized && ( + jisx0208_init() || + IMPORT_MAP(jp, jisx0213_bmp, + &jisx0213_bmp_encmap, NULL) || + IMPORT_MAP(jp, jisx0213_1_bmp, + NULL, &jisx0213_1_bmp_decmap) || + IMPORT_MAP(jp, jisx0213_2_bmp, + NULL, &jisx0213_2_bmp_decmap) || + IMPORT_MAP(jp, jisx0213_emp, + &jisx0213_emp_encmap, NULL) || + IMPORT_MAP(jp, jisx0213_1_emp, + NULL, &jisx0213_1_emp_decmap) || + IMPORT_MAP(jp, jisx0213_2_emp, + NULL, &jisx0213_2_emp_decmap) || + IMPORT_MAP(jp, jisx0213_pair, &jisx0213_pair_encmap, + &jisx0213_pair_decmap))) + return -1; + initialized = 1; + return 0; } #define config ((void *)2000) static ucs4_t jisx0213_2000_1_decoder(const unsigned char *data) { - ucs4_t u; - EMULATE_JISX0213_2000_DECODE_PLANE1(u, data[0], data[1]) - else if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */ - return 0xff3c; - else TRYMAP_DEC(jisx0208, u, data[0], data[1]); - else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]); - else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1]) - u |= 0x20000; - else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]); - else - return MAP_UNMAPPABLE; - return u; + ucs4_t u; + EMULATE_JISX0213_2000_DECODE_PLANE1(u, data[0], data[1]) + else if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */ + return 0xff3c; + else TRYMAP_DEC(jisx0208, u, data[0], data[1]); + else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]); + else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1]) + u |= 0x20000; + else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]); + else + return MAP_UNMAPPABLE; + return u; } static ucs4_t jisx0213_2000_2_decoder(const unsigned char *data) { - ucs4_t u; - EMULATE_JISX0213_2000_DECODE_PLANE2(u, data[0], data[1]) - TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]); - else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1]) - u |= 0x20000; - else - return MAP_UNMAPPABLE; - return u; + ucs4_t u; + EMULATE_JISX0213_2000_DECODE_PLANE2(u, data[0], data[1]) + TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]); + else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1]) + u |= 0x20000; + else + return MAP_UNMAPPABLE; + return u; } #undef config static ucs4_t jisx0213_2004_1_decoder(const unsigned char *data) { - ucs4_t u; - if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */ - return 0xff3c; - else TRYMAP_DEC(jisx0208, u, data[0], data[1]); - else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]); - else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1]) - u |= 0x20000; - else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]); - else - return MAP_UNMAPPABLE; - return u; + ucs4_t u; + if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */ + return 0xff3c; + else TRYMAP_DEC(jisx0208, u, data[0], data[1]); + else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]); + else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1]) + u |= 0x20000; + else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]); + else + return MAP_UNMAPPABLE; + return u; } static ucs4_t jisx0213_2004_2_decoder(const unsigned char *data) { - ucs4_t u; - TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]); - else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1]) - u |= 0x20000; - else - return MAP_UNMAPPABLE; - return u; + ucs4_t u; + TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]); + else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1]) + u |= 0x20000; + else + return MAP_UNMAPPABLE; + return u; } static DBCHAR jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config) { - DBCHAR coded; - - switch (*length) { - case 1: /* first character */ - if (*data >= 0x10000) { - if ((*data) >> 16 == 0x20000 >> 16) { - EMULATE_JISX0213_2000_ENCODE_EMP(coded, *data) - else TRYMAP_ENC(jisx0213_emp, coded, - (*data) & 0xffff) - return coded; - } - return MAP_UNMAPPABLE; - } - - EMULATE_JISX0213_2000_ENCODE_BMP(coded, *data) - else TRYMAP_ENC(jisx0213_bmp, coded, *data) { - if (coded == MULTIC) - return MAP_MULTIPLE_AVAIL; - } - else TRYMAP_ENC(jisxcommon, coded, *data) { - if (coded & 0x8000) - return MAP_UNMAPPABLE; - } - else - return MAP_UNMAPPABLE; - return coded; - case 2: /* second character of unicode pair */ - coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1], - jisx0213_pair_encmap, JISX0213_ENCPAIRS); - if (coded == DBCINV) { - *length = 1; - coded = find_pairencmap((ucs2_t)data[0], 0, - jisx0213_pair_encmap, JISX0213_ENCPAIRS); - if (coded == DBCINV) - return MAP_UNMAPPABLE; - } - else - return coded; - case -1: /* flush unterminated */ - *length = 1; - coded = find_pairencmap((ucs2_t)data[0], 0, - jisx0213_pair_encmap, JISX0213_ENCPAIRS); - if (coded == DBCINV) - return MAP_UNMAPPABLE; - else - return coded; - default: - return MAP_UNMAPPABLE; - } + DBCHAR coded; + + switch (*length) { + case 1: /* first character */ + if (*data >= 0x10000) { + if ((*data) >> 16 == 0x20000 >> 16) { + EMULATE_JISX0213_2000_ENCODE_EMP(coded, *data) + else TRYMAP_ENC(jisx0213_emp, coded, + (*data) & 0xffff) + return coded; + } + return MAP_UNMAPPABLE; + } + + EMULATE_JISX0213_2000_ENCODE_BMP(coded, *data) + else TRYMAP_ENC(jisx0213_bmp, coded, *data) { + if (coded == MULTIC) + return MAP_MULTIPLE_AVAIL; + } + else TRYMAP_ENC(jisxcommon, coded, *data) { + if (coded & 0x8000) + return MAP_UNMAPPABLE; + } + else + return MAP_UNMAPPABLE; + return coded; + case 2: /* second character of unicode pair */ + coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1], + jisx0213_pair_encmap, JISX0213_ENCPAIRS); + if (coded == DBCINV) { + *length = 1; + coded = find_pairencmap((ucs2_t)data[0], 0, + jisx0213_pair_encmap, JISX0213_ENCPAIRS); + if (coded == DBCINV) + return MAP_UNMAPPABLE; + } + else + return coded; + case -1: /* flush unterminated */ + *length = 1; + coded = find_pairencmap((ucs2_t)data[0], 0, + jisx0213_pair_encmap, JISX0213_ENCPAIRS); + if (coded == DBCINV) + return MAP_UNMAPPABLE; + else + return coded; + default: + return MAP_UNMAPPABLE; + } } static DBCHAR jisx0213_2000_1_encoder(const ucs4_t *data, Py_ssize_t *length) { - DBCHAR coded = jisx0213_encoder(data, length, (void *)2000); - if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) - return coded; - else if (coded & 0x8000) - return MAP_UNMAPPABLE; - else - return coded; + DBCHAR coded = jisx0213_encoder(data, length, (void *)2000); + if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) + return coded; + else if (coded & 0x8000) + return MAP_UNMAPPABLE; + else + return coded; } static DBCHAR jisx0213_2000_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length) { - DBCHAR coded; - Py_ssize_t ilength = *length; - - coded = jisx0213_encoder(data, length, (void *)2000); - switch (ilength) { - case 1: - if (coded == MAP_MULTIPLE_AVAIL) - return MAP_MULTIPLE_AVAIL; - else - return MAP_UNMAPPABLE; - case 2: - if (*length != 2) - return MAP_UNMAPPABLE; - else - return coded; - default: - return MAP_UNMAPPABLE; - } + DBCHAR coded; + Py_ssize_t ilength = *length; + + coded = jisx0213_encoder(data, length, (void *)2000); + switch (ilength) { + case 1: + if (coded == MAP_MULTIPLE_AVAIL) + return MAP_MULTIPLE_AVAIL; + else + return MAP_UNMAPPABLE; + case 2: + if (*length != 2) + return MAP_UNMAPPABLE; + else + return coded; + default: + return MAP_UNMAPPABLE; + } } static DBCHAR jisx0213_2000_2_encoder(const ucs4_t *data, Py_ssize_t *length) { - DBCHAR coded = jisx0213_encoder(data, length, (void *)2000); - if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) - return coded; - else if (coded & 0x8000) - return coded & 0x7fff; - else - return MAP_UNMAPPABLE; + DBCHAR coded = jisx0213_encoder(data, length, (void *)2000); + if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) + return coded; + else if (coded & 0x8000) + return coded & 0x7fff; + else + return MAP_UNMAPPABLE; } static DBCHAR jisx0213_2004_1_encoder(const ucs4_t *data, Py_ssize_t *length) { - DBCHAR coded = jisx0213_encoder(data, length, NULL); - if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) - return coded; - else if (coded & 0x8000) - return MAP_UNMAPPABLE; - else - return coded; + DBCHAR coded = jisx0213_encoder(data, length, NULL); + if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) + return coded; + else if (coded & 0x8000) + return MAP_UNMAPPABLE; + else + return coded; } static DBCHAR jisx0213_2004_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length) { - DBCHAR coded; - Py_ssize_t ilength = *length; - - coded = jisx0213_encoder(data, length, NULL); - switch (ilength) { - case 1: - if (coded == MAP_MULTIPLE_AVAIL) - return MAP_MULTIPLE_AVAIL; - else - return MAP_UNMAPPABLE; - case 2: - if (*length != 2) - return MAP_UNMAPPABLE; - else - return coded; - default: - return MAP_UNMAPPABLE; - } + DBCHAR coded; + Py_ssize_t ilength = *length; + + coded = jisx0213_encoder(data, length, NULL); + switch (ilength) { + case 1: + if (coded == MAP_MULTIPLE_AVAIL) + return MAP_MULTIPLE_AVAIL; + else + return MAP_UNMAPPABLE; + case 2: + if (*length != 2) + return MAP_UNMAPPABLE; + else + return coded; + default: + return MAP_UNMAPPABLE; + } } static DBCHAR jisx0213_2004_2_encoder(const ucs4_t *data, Py_ssize_t *length) { - DBCHAR coded = jisx0213_encoder(data, length, NULL); - if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) - return coded; - else if (coded & 0x8000) - return coded & 0x7fff; - else - return MAP_UNMAPPABLE; + DBCHAR coded = jisx0213_encoder(data, length, NULL); + if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) + return coded; + else if (coded & 0x8000) + return coded & 0x7fff; + else + return MAP_UNMAPPABLE; } static ucs4_t jisx0201_r_decoder(const unsigned char *data) { - ucs4_t u; - JISX0201_R_DECODE(*data, u) - else return MAP_UNMAPPABLE; - return u; + ucs4_t u; + JISX0201_R_DECODE(*data, u) + else return MAP_UNMAPPABLE; + return u; } static DBCHAR jisx0201_r_encoder(const ucs4_t *data, Py_ssize_t *length) { - DBCHAR coded; - JISX0201_R_ENCODE(*data, coded) - else return MAP_UNMAPPABLE; - return coded; + DBCHAR coded; + JISX0201_R_ENCODE(*data, coded) + else return MAP_UNMAPPABLE; + return coded; } static ucs4_t jisx0201_k_decoder(const unsigned char *data) { - ucs4_t u; - JISX0201_K_DECODE(*data ^ 0x80, u) - else return MAP_UNMAPPABLE; - return u; + ucs4_t u; + JISX0201_K_DECODE(*data ^ 0x80, u) + else return MAP_UNMAPPABLE; + return u; } static DBCHAR jisx0201_k_encoder(const ucs4_t *data, Py_ssize_t *length) { - DBCHAR coded; - JISX0201_K_ENCODE(*data, coded) - else return MAP_UNMAPPABLE; - return coded - 0x80; + DBCHAR coded; + JISX0201_K_ENCODE(*data, coded) + else return MAP_UNMAPPABLE; + return coded - 0x80; } static int gb2312_init(void) { - static int initialized = 0; - - if (!initialized && ( - IMPORT_MAP(cn, gbcommon, &gbcommon_encmap, NULL) || - IMPORT_MAP(cn, gb2312, NULL, &gb2312_decmap))) - return -1; - initialized = 1; - return 0; + static int initialized = 0; + + if (!initialized && ( + IMPORT_MAP(cn, gbcommon, &gbcommon_encmap, NULL) || + IMPORT_MAP(cn, gb2312, NULL, &gb2312_decmap))) + return -1; + initialized = 1; + return 0; } static ucs4_t gb2312_decoder(const unsigned char *data) { - ucs4_t u; - TRYMAP_DEC(gb2312, u, data[0], data[1]) - return u; - else - return MAP_UNMAPPABLE; + ucs4_t u; + TRYMAP_DEC(gb2312, u, data[0], data[1]) + return u; + else + return MAP_UNMAPPABLE; } static DBCHAR gb2312_encoder(const ucs4_t *data, Py_ssize_t *length) { - DBCHAR coded; - assert(*length == 1); - if (*data < 0x10000) { - TRYMAP_ENC(gbcommon, coded, *data) { - if (!(coded & 0x8000)) - return coded; - } - } - return MAP_UNMAPPABLE; + DBCHAR coded; + assert(*length == 1); + if (*data < 0x10000) { + TRYMAP_ENC(gbcommon, coded, *data) { + if (!(coded & 0x8000)) + return coded; + } + } + return MAP_UNMAPPABLE; } static ucs4_t dummy_decoder(const unsigned char *data) { - return MAP_UNMAPPABLE; + return MAP_UNMAPPABLE; } static DBCHAR dummy_encoder(const ucs4_t *data, Py_ssize_t *length) { - return MAP_UNMAPPABLE; + return MAP_UNMAPPABLE; } /*-*- registry tables -*-*/ -#define REGISTRY_KSX1001_G0 { CHARSET_KSX1001, 0, 2, \ - ksx1001_init, \ - ksx1001_decoder, ksx1001_encoder } -#define REGISTRY_KSX1001_G1 { CHARSET_KSX1001, 1, 2, \ - ksx1001_init, \ - ksx1001_decoder, ksx1001_encoder } -#define REGISTRY_JISX0201_R { CHARSET_JISX0201_R, 0, 1, \ - NULL, \ - jisx0201_r_decoder, jisx0201_r_encoder } -#define REGISTRY_JISX0201_K { CHARSET_JISX0201_K, 0, 1, \ - NULL, \ - jisx0201_k_decoder, jisx0201_k_encoder } -#define REGISTRY_JISX0208 { CHARSET_JISX0208, 0, 2, \ - jisx0208_init, \ - jisx0208_decoder, jisx0208_encoder } -#define REGISTRY_JISX0208_O { CHARSET_JISX0208_O, 0, 2, \ - jisx0208_init, \ - jisx0208_decoder, jisx0208_encoder } -#define REGISTRY_JISX0212 { CHARSET_JISX0212, 0, 2, \ - jisx0212_init, \ - jisx0212_decoder, jisx0212_encoder } -#define REGISTRY_JISX0213_2000_1 { CHARSET_JISX0213_2000_1, 0, 2, \ - jisx0213_init, \ - jisx0213_2000_1_decoder, \ - jisx0213_2000_1_encoder } +#define REGISTRY_KSX1001_G0 { CHARSET_KSX1001, 0, 2, \ + ksx1001_init, \ + ksx1001_decoder, ksx1001_encoder } +#define REGISTRY_KSX1001_G1 { CHARSET_KSX1001, 1, 2, \ + ksx1001_init, \ + ksx1001_decoder, ksx1001_encoder } +#define REGISTRY_JISX0201_R { CHARSET_JISX0201_R, 0, 1, \ + NULL, \ + jisx0201_r_decoder, jisx0201_r_encoder } +#define REGISTRY_JISX0201_K { CHARSET_JISX0201_K, 0, 1, \ + NULL, \ + jisx0201_k_decoder, jisx0201_k_encoder } +#define REGISTRY_JISX0208 { CHARSET_JISX0208, 0, 2, \ + jisx0208_init, \ + jisx0208_decoder, jisx0208_encoder } +#define REGISTRY_JISX0208_O { CHARSET_JISX0208_O, 0, 2, \ + jisx0208_init, \ + jisx0208_decoder, jisx0208_encoder } +#define REGISTRY_JISX0212 { CHARSET_JISX0212, 0, 2, \ + jisx0212_init, \ + jisx0212_decoder, jisx0212_encoder } +#define REGISTRY_JISX0213_2000_1 { CHARSET_JISX0213_2000_1, 0, 2, \ + jisx0213_init, \ + jisx0213_2000_1_decoder, \ + jisx0213_2000_1_encoder } #define REGISTRY_JISX0213_2000_1_PAIRONLY { CHARSET_JISX0213_2000_1, 0, 2, \ - jisx0213_init, \ - jisx0213_2000_1_decoder, \ - jisx0213_2000_1_encoder_paironly } -#define REGISTRY_JISX0213_2000_2 { CHARSET_JISX0213_2, 0, 2, \ - jisx0213_init, \ - jisx0213_2000_2_decoder, \ - jisx0213_2000_2_encoder } -#define REGISTRY_JISX0213_2004_1 { CHARSET_JISX0213_2004_1, 0, 2, \ - jisx0213_init, \ - jisx0213_2004_1_decoder, \ - jisx0213_2004_1_encoder } + jisx0213_init, \ + jisx0213_2000_1_decoder, \ + jisx0213_2000_1_encoder_paironly } +#define REGISTRY_JISX0213_2000_2 { CHARSET_JISX0213_2, 0, 2, \ + jisx0213_init, \ + jisx0213_2000_2_decoder, \ + jisx0213_2000_2_encoder } +#define REGISTRY_JISX0213_2004_1 { CHARSET_JISX0213_2004_1, 0, 2, \ + jisx0213_init, \ + jisx0213_2004_1_decoder, \ + jisx0213_2004_1_encoder } #define REGISTRY_JISX0213_2004_1_PAIRONLY { CHARSET_JISX0213_2004_1, 0, 2, \ - jisx0213_init, \ - jisx0213_2004_1_decoder, \ - jisx0213_2004_1_encoder_paironly } -#define REGISTRY_JISX0213_2004_2 { CHARSET_JISX0213_2, 0, 2, \ - jisx0213_init, \ - jisx0213_2004_2_decoder, \ - jisx0213_2004_2_encoder } -#define REGISTRY_GB2312 { CHARSET_GB2312, 0, 2, \ - gb2312_init, \ - gb2312_decoder, gb2312_encoder } -#define REGISTRY_CNS11643_1 { CHARSET_CNS11643_1, 1, 2, \ - cns11643_init, \ - cns11643_1_decoder, cns11643_1_encoder } -#define REGISTRY_CNS11643_2 { CHARSET_CNS11643_2, 2, 2, \ - cns11643_init, \ - cns11643_2_decoder, cns11643_2_encoder } -#define REGISTRY_ISO8859_1 { CHARSET_ISO8859_1, 2, 1, \ - NULL, dummy_decoder, dummy_encoder } -#define REGISTRY_ISO8859_7 { CHARSET_ISO8859_7, 2, 1, \ - NULL, dummy_decoder, dummy_encoder } -#define REGISTRY_SENTINEL { 0, } -#define CONFIGDEF(var, attrs) \ - static const struct iso2022_config iso2022_##var##_config = { \ - attrs, iso2022_##var##_designations \ - }; + jisx0213_init, \ + jisx0213_2004_1_decoder, \ + jisx0213_2004_1_encoder_paironly } +#define REGISTRY_JISX0213_2004_2 { CHARSET_JISX0213_2, 0, 2, \ + jisx0213_init, \ + jisx0213_2004_2_decoder, \ + jisx0213_2004_2_encoder } +#define REGISTRY_GB2312 { CHARSET_GB2312, 0, 2, \ + gb2312_init, \ + gb2312_decoder, gb2312_encoder } +#define REGISTRY_CNS11643_1 { CHARSET_CNS11643_1, 1, 2, \ + cns11643_init, \ + cns11643_1_decoder, cns11643_1_encoder } +#define REGISTRY_CNS11643_2 { CHARSET_CNS11643_2, 2, 2, \ + cns11643_init, \ + cns11643_2_decoder, cns11643_2_encoder } +#define REGISTRY_ISO8859_1 { CHARSET_ISO8859_1, 2, 1, \ + NULL, dummy_decoder, dummy_encoder } +#define REGISTRY_ISO8859_7 { CHARSET_ISO8859_7, 2, 1, \ + NULL, dummy_decoder, dummy_encoder } +#define REGISTRY_SENTINEL { 0, } +#define CONFIGDEF(var, attrs) \ + static const struct iso2022_config iso2022_##var##_config = { \ + attrs, iso2022_##var##_designations \ + }; static const struct iso2022_designation iso2022_kr_designations[] = { - REGISTRY_KSX1001_G1, REGISTRY_SENTINEL + REGISTRY_KSX1001_G1, REGISTRY_SENTINEL }; CONFIGDEF(kr, 0) static const struct iso2022_designation iso2022_jp_designations[] = { - REGISTRY_JISX0208, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O, - REGISTRY_SENTINEL + REGISTRY_JISX0208, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O, + REGISTRY_SENTINEL }; CONFIGDEF(jp, NO_SHIFT | USE_JISX0208_EXT) static const struct iso2022_designation iso2022_jp_1_designations[] = { - REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_JISX0201_R, - REGISTRY_JISX0208_O, REGISTRY_SENTINEL + REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_JISX0201_R, + REGISTRY_JISX0208_O, REGISTRY_SENTINEL }; CONFIGDEF(jp_1, NO_SHIFT | USE_JISX0208_EXT) static const struct iso2022_designation iso2022_jp_2_designations[] = { - REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_KSX1001_G0, - REGISTRY_GB2312, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O, - REGISTRY_ISO8859_1, REGISTRY_ISO8859_7, REGISTRY_SENTINEL + REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_KSX1001_G0, + REGISTRY_GB2312, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O, + REGISTRY_ISO8859_1, REGISTRY_ISO8859_7, REGISTRY_SENTINEL }; CONFIGDEF(jp_2, NO_SHIFT | USE_G2 | USE_JISX0208_EXT) static const struct iso2022_designation iso2022_jp_2004_designations[] = { - REGISTRY_JISX0213_2004_1_PAIRONLY, REGISTRY_JISX0208, - REGISTRY_JISX0213_2004_1, REGISTRY_JISX0213_2004_2, REGISTRY_SENTINEL + REGISTRY_JISX0213_2004_1_PAIRONLY, REGISTRY_JISX0208, + REGISTRY_JISX0213_2004_1, REGISTRY_JISX0213_2004_2, REGISTRY_SENTINEL }; CONFIGDEF(jp_2004, NO_SHIFT | USE_JISX0208_EXT) static const struct iso2022_designation iso2022_jp_3_designations[] = { - REGISTRY_JISX0213_2000_1_PAIRONLY, REGISTRY_JISX0208, - REGISTRY_JISX0213_2000_1, REGISTRY_JISX0213_2000_2, REGISTRY_SENTINEL + REGISTRY_JISX0213_2000_1_PAIRONLY, REGISTRY_JISX0208, + REGISTRY_JISX0213_2000_1, REGISTRY_JISX0213_2000_2, REGISTRY_SENTINEL }; CONFIGDEF(jp_3, NO_SHIFT | USE_JISX0208_EXT) static const struct iso2022_designation iso2022_jp_ext_designations[] = { - REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_JISX0201_R, - REGISTRY_JISX0201_K, REGISTRY_JISX0208_O, REGISTRY_SENTINEL + REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_JISX0201_R, + REGISTRY_JISX0201_K, REGISTRY_JISX0208_O, REGISTRY_SENTINEL }; CONFIGDEF(jp_ext, NO_SHIFT | USE_JISX0208_EXT) @@ -1111,11 +1111,11 @@ BEGIN_MAPPINGS_LIST /* no mapping table here */ END_MAPPINGS_LIST -#define ISO2022_CODEC(variation) { \ - "iso2022_" #variation, \ - &iso2022_##variation##_config, \ - iso2022_codec_init, \ - _STATEFUL_METHODS(iso2022) \ +#define ISO2022_CODEC(variation) { \ + "iso2022_" #variation, \ + &iso2022_##variation##_config, \ + iso2022_codec_init, \ + _STATEFUL_METHODS(iso2022) \ }, BEGIN_CODECS_LIST diff --git a/Modules/cjkcodecs/_codecs_jp.c b/Modules/cjkcodecs/_codecs_jp.c index f49a10b..901d3be 100644 --- a/Modules/cjkcodecs/_codecs_jp.c +++ b/Modules/cjkcodecs/_codecs_jp.c @@ -19,124 +19,124 @@ ENCODER(cp932) { - while (inleft > 0) { - Py_UNICODE c = IN1; - DBCHAR code; - unsigned char c1, c2; - - if (c <= 0x80) { - WRITE1((unsigned char)c) - NEXT(1, 1) - continue; - } - else if (c >= 0xff61 && c <= 0xff9f) { - WRITE1(c - 0xfec0) - NEXT(1, 1) - continue; - } - else if (c >= 0xf8f0 && c <= 0xf8f3) { - /* Windows compatibility */ - REQUIRE_OUTBUF(1) - if (c == 0xf8f0) - OUT1(0xa0) - else - OUT1(c - 0xfef1 + 0xfd) - NEXT(1, 1) - continue; - } - - UCS4INVALID(c) - REQUIRE_OUTBUF(2) - - TRYMAP_ENC(cp932ext, code, c) { - OUT1(code >> 8) - OUT2(code & 0xff) - } - else TRYMAP_ENC(jisxcommon, code, c) { - if (code & 0x8000) /* MSB set: JIS X 0212 */ - return 1; - - /* JIS X 0208 */ - c1 = code >> 8; - c2 = code & 0xff; - c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21); - c1 = (c1 - 0x21) >> 1; - OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1) - OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41) - } - else if (c >= 0xe000 && c < 0xe758) { - /* User-defined area */ - c1 = (Py_UNICODE)(c - 0xe000) / 188; - c2 = (Py_UNICODE)(c - 0xe000) % 188; - OUT1(c1 + 0xf0) - OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41) - } - else - return 1; - - NEXT(1, 2) - } - - return 0; + while (inleft > 0) { + Py_UNICODE c = IN1; + DBCHAR code; + unsigned char c1, c2; + + if (c <= 0x80) { + WRITE1((unsigned char)c) + NEXT(1, 1) + continue; + } + else if (c >= 0xff61 && c <= 0xff9f) { + WRITE1(c - 0xfec0) + NEXT(1, 1) + continue; + } + else if (c >= 0xf8f0 && c <= 0xf8f3) { + /* Windows compatibility */ + REQUIRE_OUTBUF(1) + if (c == 0xf8f0) + OUT1(0xa0) + else + OUT1(c - 0xfef1 + 0xfd) + NEXT(1, 1) + continue; + } + + UCS4INVALID(c) + REQUIRE_OUTBUF(2) + + TRYMAP_ENC(cp932ext, code, c) { + OUT1(code >> 8) + OUT2(code & 0xff) + } + else TRYMAP_ENC(jisxcommon, code, c) { + if (code & 0x8000) /* MSB set: JIS X 0212 */ + return 1; + + /* JIS X 0208 */ + c1 = code >> 8; + c2 = code & 0xff; + c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21); + c1 = (c1 - 0x21) >> 1; + OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1) + OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41) + } + else if (c >= 0xe000 && c < 0xe758) { + /* User-defined area */ + c1 = (Py_UNICODE)(c - 0xe000) / 188; + c2 = (Py_UNICODE)(c - 0xe000) % 188; + OUT1(c1 + 0xf0) + OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41) + } + else + return 1; + + NEXT(1, 2) + } + + return 0; } DECODER(cp932) { - while (inleft > 0) { - unsigned char c = IN1, c2; - - REQUIRE_OUTBUF(1) - if (c <= 0x80) { - OUT1(c) - NEXT(1, 1) - continue; - } - else if (c >= 0xa0 && c <= 0xdf) { - if (c == 0xa0) - OUT1(0xf8f0) /* half-width katakana */ - else - OUT1(0xfec0 + c) - NEXT(1, 1) - continue; - } - else if (c >= 0xfd/* && c <= 0xff*/) { - /* Windows compatibility */ - OUT1(0xf8f1 - 0xfd + c) - NEXT(1, 1) - continue; - } - - REQUIRE_INBUF(2) - c2 = IN2; - - TRYMAP_DEC(cp932ext, **outbuf, c, c2); - else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){ - if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) - return 2; - - c = (c < 0xe0 ? c - 0x81 : c - 0xc1); - c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); - c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21); - c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21; - - TRYMAP_DEC(jisx0208, **outbuf, c, c2); - else return 2; - } - else if (c >= 0xf0 && c <= 0xf9) { - if ((c2 >= 0x40 && c2 <= 0x7e) || - (c2 >= 0x80 && c2 <= 0xfc)) - OUT1(0xe000 + 188 * (c - 0xf0) + - (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41)) - else - return 2; - } - else - return 2; - - NEXT(2, 1) - } - - return 0; + while (inleft > 0) { + unsigned char c = IN1, c2; + + REQUIRE_OUTBUF(1) + if (c <= 0x80) { + OUT1(c) + NEXT(1, 1) + continue; + } + else if (c >= 0xa0 && c <= 0xdf) { + if (c == 0xa0) + OUT1(0xf8f0) /* half-width katakana */ + else + OUT1(0xfec0 + c) + NEXT(1, 1) + continue; + } + else if (c >= 0xfd/* && c <= 0xff*/) { + /* Windows compatibility */ + OUT1(0xf8f1 - 0xfd + c) + NEXT(1, 1) + continue; + } + + REQUIRE_INBUF(2) + c2 = IN2; + + TRYMAP_DEC(cp932ext, **outbuf, c, c2); + else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){ + if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) + return 2; + + c = (c < 0xe0 ? c - 0x81 : c - 0xc1); + c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); + c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21); + c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21; + + TRYMAP_DEC(jisx0208, **outbuf, c, c2); + else return 2; + } + else if (c >= 0xf0 && c <= 0xf9) { + if ((c2 >= 0x40 && c2 <= 0x7e) || + (c2 >= 0x80 && c2 <= 0xfc)) + OUT1(0xe000 + 188 * (c - 0xf0) + + (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41)) + else + return 2; + } + else + return 2; + + NEXT(2, 1) + } + + return 0; } @@ -146,166 +146,166 @@ DECODER(cp932) ENCODER(euc_jis_2004) { - while (inleft > 0) { - ucs4_t c = IN1; - DBCHAR code; - Py_ssize_t insize; - - if (c < 0x80) { - WRITE1(c) - NEXT(1, 1) - continue; - } - - DECODE_SURROGATE(c) - insize = GET_INSIZE(c); - - if (c <= 0xFFFF) { - EMULATE_JISX0213_2000_ENCODE_BMP(code, c) - else TRYMAP_ENC(jisx0213_bmp, code, c) { - if (code == MULTIC) { - if (inleft < 2) { - if (flags & MBENC_FLUSH) { - code = find_pairencmap( - (ucs2_t)c, 0, - jisx0213_pair_encmap, - JISX0213_ENCPAIRS); - if (code == DBCINV) - return 1; - } - else - return MBERR_TOOFEW; - } - else { - code = find_pairencmap( - (ucs2_t)c, (*inbuf)[1], - jisx0213_pair_encmap, - JISX0213_ENCPAIRS); - if (code == DBCINV) { - code = find_pairencmap( - (ucs2_t)c, 0, - jisx0213_pair_encmap, - JISX0213_ENCPAIRS); - if (code == DBCINV) - return 1; - } else - insize = 2; - } - } - } - else TRYMAP_ENC(jisxcommon, code, c); - else if (c >= 0xff61 && c <= 0xff9f) { - /* JIS X 0201 half-width katakana */ - WRITE2(0x8e, c - 0xfec0) - NEXT(1, 2) - continue; - } - else if (c == 0xff3c) - /* F/W REVERSE SOLIDUS (see NOTES) */ - code = 0x2140; - else if (c == 0xff5e) - /* F/W TILDE (see NOTES) */ - code = 0x2232; - else - return 1; - } - else if (c >> 16 == EMPBASE >> 16) { - EMULATE_JISX0213_2000_ENCODE_EMP(code, c) - else TRYMAP_ENC(jisx0213_emp, code, c & 0xffff); - else return insize; - } - else - return insize; - - if (code & 0x8000) { - /* Codeset 2 */ - WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80) - NEXT(insize, 3) - } else { - /* Codeset 1 */ - WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80) - NEXT(insize, 2) - } - } - - return 0; + while (inleft > 0) { + ucs4_t c = IN1; + DBCHAR code; + Py_ssize_t insize; + + if (c < 0x80) { + WRITE1(c) + NEXT(1, 1) + continue; + } + + DECODE_SURROGATE(c) + insize = GET_INSIZE(c); + + if (c <= 0xFFFF) { + EMULATE_JISX0213_2000_ENCODE_BMP(code, c) + else TRYMAP_ENC(jisx0213_bmp, code, c) { + if (code == MULTIC) { + if (inleft < 2) { + if (flags & MBENC_FLUSH) { + code = find_pairencmap( + (ucs2_t)c, 0, + jisx0213_pair_encmap, + JISX0213_ENCPAIRS); + if (code == DBCINV) + return 1; + } + else + return MBERR_TOOFEW; + } + else { + code = find_pairencmap( + (ucs2_t)c, (*inbuf)[1], + jisx0213_pair_encmap, + JISX0213_ENCPAIRS); + if (code == DBCINV) { + code = find_pairencmap( + (ucs2_t)c, 0, + jisx0213_pair_encmap, + JISX0213_ENCPAIRS); + if (code == DBCINV) + return 1; + } else + insize = 2; + } + } + } + else TRYMAP_ENC(jisxcommon, code, c); + else if (c >= 0xff61 && c <= 0xff9f) { + /* JIS X 0201 half-width katakana */ + WRITE2(0x8e, c - 0xfec0) + NEXT(1, 2) + continue; + } + else if (c == 0xff3c) + /* F/W REVERSE SOLIDUS (see NOTES) */ + code = 0x2140; + else if (c == 0xff5e) + /* F/W TILDE (see NOTES) */ + code = 0x2232; + else + return 1; + } + else if (c >> 16 == EMPBASE >> 16) { + EMULATE_JISX0213_2000_ENCODE_EMP(code, c) + else TRYMAP_ENC(jisx0213_emp, code, c & 0xffff); + else return insize; + } + else + return insize; + + if (code & 0x8000) { + /* Codeset 2 */ + WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80) + NEXT(insize, 3) + } else { + /* Codeset 1 */ + WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80) + NEXT(insize, 2) + } + } + + return 0; } DECODER(euc_jis_2004) { - while (inleft > 0) { - unsigned char c = IN1; - ucs4_t code; - - REQUIRE_OUTBUF(1) - - if (c < 0x80) { - OUT1(c) - NEXT(1, 1) - continue; - } - - if (c == 0x8e) { - /* JIS X 0201 half-width katakana */ - unsigned char c2; - - REQUIRE_INBUF(2) - c2 = IN2; - if (c2 >= 0xa1 && c2 <= 0xdf) { - OUT1(0xfec0 + c2) - NEXT(2, 1) - } - else - return 2; - } - else if (c == 0x8f) { - unsigned char c2, c3; - - REQUIRE_INBUF(3) - c2 = IN2 ^ 0x80; - c3 = IN3 ^ 0x80; - - /* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */ - EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf, c2, c3) - else TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c2, c3) ; - else TRYMAP_DEC(jisx0213_2_emp, code, c2, c3) { - WRITEUCS4(EMPBASE | code) - NEXT_IN(3) - continue; - } - else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ; - else return 3; - NEXT(3, 1) - } - else { - unsigned char c2; - - REQUIRE_INBUF(2) - c ^= 0x80; - c2 = IN2 ^ 0x80; - - /* JIS X 0213 Plane 1 */ - EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf, c, c2) - else if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c; - else if (c == 0x22 && c2 == 0x32) **outbuf = 0xff5e; - else TRYMAP_DEC(jisx0208, **outbuf, c, c2); - else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2); - else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) { - WRITEUCS4(EMPBASE | code) - NEXT_IN(2) - continue; - } - else TRYMAP_DEC(jisx0213_pair, code, c, c2) { - WRITE2(code >> 16, code & 0xffff) - NEXT(2, 2) - continue; - } - else return 2; - NEXT(2, 1) - } - } - - return 0; + while (inleft > 0) { + unsigned char c = IN1; + ucs4_t code; + + REQUIRE_OUTBUF(1) + + if (c < 0x80) { + OUT1(c) + NEXT(1, 1) + continue; + } + + if (c == 0x8e) { + /* JIS X 0201 half-width katakana */ + unsigned char c2; + + REQUIRE_INBUF(2) + c2 = IN2; + if (c2 >= 0xa1 && c2 <= 0xdf) { + OUT1(0xfec0 + c2) + NEXT(2, 1) + } + else + return 2; + } + else if (c == 0x8f) { + unsigned char c2, c3; + + REQUIRE_INBUF(3) + c2 = IN2 ^ 0x80; + c3 = IN3 ^ 0x80; + + /* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */ + EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf, c2, c3) + else TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c2, c3) ; + else TRYMAP_DEC(jisx0213_2_emp, code, c2, c3) { + WRITEUCS4(EMPBASE | code) + NEXT_IN(3) + continue; + } + else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ; + else return 3; + NEXT(3, 1) + } + else { + unsigned char c2; + + REQUIRE_INBUF(2) + c ^= 0x80; + c2 = IN2 ^ 0x80; + + /* JIS X 0213 Plane 1 */ + EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf, c, c2) + else if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c; + else if (c == 0x22 && c2 == 0x32) **outbuf = 0xff5e; + else TRYMAP_DEC(jisx0208, **outbuf, c, c2); + else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2); + else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) { + WRITEUCS4(EMPBASE | code) + NEXT_IN(2) + continue; + } + else TRYMAP_DEC(jisx0213_pair, code, c, c2) { + WRITE2(code >> 16, code & 0xffff) + NEXT(2, 2) + continue; + } + else return 2; + NEXT(2, 1) + } + } + + return 0; } @@ -315,114 +315,114 @@ DECODER(euc_jis_2004) ENCODER(euc_jp) { - while (inleft > 0) { - Py_UNICODE c = IN1; - DBCHAR code; - - if (c < 0x80) { - WRITE1((unsigned char)c) - NEXT(1, 1) - continue; - } - - UCS4INVALID(c) - - TRYMAP_ENC(jisxcommon, code, c); - else if (c >= 0xff61 && c <= 0xff9f) { - /* JIS X 0201 half-width katakana */ - WRITE2(0x8e, c - 0xfec0) - NEXT(1, 2) - continue; - } + while (inleft > 0) { + Py_UNICODE c = IN1; + DBCHAR code; + + if (c < 0x80) { + WRITE1((unsigned char)c) + NEXT(1, 1) + continue; + } + + UCS4INVALID(c) + + TRYMAP_ENC(jisxcommon, code, c); + else if (c >= 0xff61 && c <= 0xff9f) { + /* JIS X 0201 half-width katakana */ + WRITE2(0x8e, c - 0xfec0) + NEXT(1, 2) + continue; + } #ifndef STRICT_BUILD - else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */ - code = 0x2140; - else if (c == 0xa5) { /* YEN SIGN */ - WRITE1(0x5c); - NEXT(1, 1) - continue; - } else if (c == 0x203e) { /* OVERLINE */ - WRITE1(0x7e); - NEXT(1, 1) - continue; - } + else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */ + code = 0x2140; + else if (c == 0xa5) { /* YEN SIGN */ + WRITE1(0x5c); + NEXT(1, 1) + continue; + } else if (c == 0x203e) { /* OVERLINE */ + WRITE1(0x7e); + NEXT(1, 1) + continue; + } #endif - else - return 1; - - if (code & 0x8000) { - /* JIS X 0212 */ - WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80) - NEXT(1, 3) - } else { - /* JIS X 0208 */ - WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80) - NEXT(1, 2) - } - } - - return 0; + else + return 1; + + if (code & 0x8000) { + /* JIS X 0212 */ + WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80) + NEXT(1, 3) + } else { + /* JIS X 0208 */ + WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80) + NEXT(1, 2) + } + } + + return 0; } DECODER(euc_jp) { - while (inleft > 0) { - unsigned char c = IN1; - - REQUIRE_OUTBUF(1) - - if (c < 0x80) { - OUT1(c) - NEXT(1, 1) - continue; - } - - if (c == 0x8e) { - /* JIS X 0201 half-width katakana */ - unsigned char c2; - - REQUIRE_INBUF(2) - c2 = IN2; - if (c2 >= 0xa1 && c2 <= 0xdf) { - OUT1(0xfec0 + c2) - NEXT(2, 1) - } - else - return 2; - } - else if (c == 0x8f) { - unsigned char c2, c3; - - REQUIRE_INBUF(3) - c2 = IN2; - c3 = IN3; - /* JIS X 0212 */ - TRYMAP_DEC(jisx0212, **outbuf, c2 ^ 0x80, c3 ^ 0x80) { - NEXT(3, 1) - } - else - return 3; - } - else { - unsigned char c2; - - REQUIRE_INBUF(2) - c2 = IN2; - /* JIS X 0208 */ + while (inleft > 0) { + unsigned char c = IN1; + + REQUIRE_OUTBUF(1) + + if (c < 0x80) { + OUT1(c) + NEXT(1, 1) + continue; + } + + if (c == 0x8e) { + /* JIS X 0201 half-width katakana */ + unsigned char c2; + + REQUIRE_INBUF(2) + c2 = IN2; + if (c2 >= 0xa1 && c2 <= 0xdf) { + OUT1(0xfec0 + c2) + NEXT(2, 1) + } + else + return 2; + } + else if (c == 0x8f) { + unsigned char c2, c3; + + REQUIRE_INBUF(3) + c2 = IN2; + c3 = IN3; + /* JIS X 0212 */ + TRYMAP_DEC(jisx0212, **outbuf, c2 ^ 0x80, c3 ^ 0x80) { + NEXT(3, 1) + } + else + return 3; + } + else { + unsigned char c2; + + REQUIRE_INBUF(2) + c2 = IN2; + /* JIS X 0208 */ #ifndef STRICT_BUILD - if (c == 0xa1 && c2 == 0xc0) - /* FULL-WIDTH REVERSE SOLIDUS */ - **outbuf = 0xff3c; - else + if (c == 0xa1 && c2 == 0xc0) + /* FULL-WIDTH REVERSE SOLIDUS */ + **outbuf = 0xff3c; + else #endif - TRYMAP_DEC(jisx0208, **outbuf, - c ^ 0x80, c2 ^ 0x80) ; - else return 2; - NEXT(2, 1) - } - } - - return 0; + TRYMAP_DEC(jisx0208, **outbuf, + c ^ 0x80, c2 ^ 0x80) ; + else return 2; + NEXT(2, 1) + } + } + + return 0; } @@ -432,105 +432,105 @@ DECODER(euc_jp) ENCODER(shift_jis) { - while (inleft > 0) { - Py_UNICODE c = IN1; - DBCHAR code; - unsigned char c1, c2; + while (inleft > 0) { + Py_UNICODE c = IN1; + DBCHAR code; + unsigned char c1, c2; #ifdef STRICT_BUILD - JISX0201_R_ENCODE(c, code) + JISX0201_R_ENCODE(c, code) #else - if (c < 0x80) code = c; - else if (c == 0x00a5) code = 0x5c; /* YEN SIGN */ - else if (c == 0x203e) code = 0x7e; /* OVERLINE */ + if (c < 0x80) code = c; + else if (c == 0x00a5) code = 0x5c; /* YEN SIGN */ + else if (c == 0x203e) code = 0x7e; /* OVERLINE */ #endif - else JISX0201_K_ENCODE(c, code) - else UCS4INVALID(c) - else code = NOCHAR; + else JISX0201_K_ENCODE(c, code) + else UCS4INVALID(c) + else code = NOCHAR; - if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) { - REQUIRE_OUTBUF(1) + if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) { + REQUIRE_OUTBUF(1) - OUT1((unsigned char)code) - NEXT(1, 1) - continue; - } + OUT1((unsigned char)code) + NEXT(1, 1) + continue; + } - REQUIRE_OUTBUF(2) + REQUIRE_OUTBUF(2) - if (code == NOCHAR) { - TRYMAP_ENC(jisxcommon, code, c); + if (code == NOCHAR) { + TRYMAP_ENC(jisxcommon, code, c); #ifndef STRICT_BUILD - else if (c == 0xff3c) - code = 0x2140; /* FULL-WIDTH REVERSE SOLIDUS */ + else if (c == 0xff3c) + code = 0x2140; /* FULL-WIDTH REVERSE SOLIDUS */ #endif - else - return 1; - - if (code & 0x8000) /* MSB set: JIS X 0212 */ - return 1; - } - - c1 = code >> 8; - c2 = code & 0xff; - c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21); - c1 = (c1 - 0x21) >> 1; - OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1) - OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41) - NEXT(1, 2) - } - - return 0; + else + return 1; + + if (code & 0x8000) /* MSB set: JIS X 0212 */ + return 1; + } + + c1 = code >> 8; + c2 = code & 0xff; + c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21); + c1 = (c1 - 0x21) >> 1; + OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1) + OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41) + NEXT(1, 2) + } + + return 0; } DECODER(shift_jis) { - while (inleft > 0) { - unsigned char c = IN1; + while (inleft > 0) { + unsigned char c = IN1; - REQUIRE_OUTBUF(1) + REQUIRE_OUTBUF(1) #ifdef STRICT_BUILD - JISX0201_R_DECODE(c, **outbuf) + JISX0201_R_DECODE(c, **outbuf) #else - if (c < 0x80) **outbuf = c; + if (c < 0x80) **outbuf = c; #endif - else JISX0201_K_DECODE(c, **outbuf) - else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){ - unsigned char c1, c2; + else JISX0201_K_DECODE(c, **outbuf) + else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){ + unsigned char c1, c2; - REQUIRE_INBUF(2) - c2 = IN2; - if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) - return 2; + REQUIRE_INBUF(2) + c2 = IN2; + if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) + return 2; - c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1); - c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); - c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1) + 0x21); - c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21; + c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1); + c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); + c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1) + 0x21); + c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21; #ifndef STRICT_BUILD - if (c1 == 0x21 && c2 == 0x40) { - /* FULL-WIDTH REVERSE SOLIDUS */ - OUT1(0xff3c) - NEXT(2, 1) - continue; - } + if (c1 == 0x21 && c2 == 0x40) { + /* FULL-WIDTH REVERSE SOLIDUS */ + OUT1(0xff3c) + NEXT(2, 1) + continue; + } #endif - TRYMAP_DEC(jisx0208, **outbuf, c1, c2) { - NEXT(2, 1) - continue; - } - else - return 2; - } - else - return 2; - - NEXT(1, 1) /* JIS X 0201 */ - } - - return 0; + TRYMAP_DEC(jisx0208, **outbuf, c1, c2) { + NEXT(2, 1) + continue; + } + else + return 2; + } + else + return 2; + + NEXT(1, 1) /* JIS X 0201 */ + } + + return 0; } @@ -540,167 +540,167 @@ DECODER(shift_jis) ENCODER(shift_jis_2004) { - while (inleft > 0) { - ucs4_t c = IN1; - DBCHAR code = NOCHAR; - int c1, c2; - Py_ssize_t insize; - - JISX0201_ENCODE(c, code) - else DECODE_SURROGATE(c) - - if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) { - WRITE1((unsigned char)code) - NEXT(1, 1) - continue; - } - - REQUIRE_OUTBUF(2) - insize = GET_INSIZE(c); - - if (code == NOCHAR) { - if (c <= 0xffff) { - EMULATE_JISX0213_2000_ENCODE_BMP(code, c) - else TRYMAP_ENC(jisx0213_bmp, code, c) { - if (code == MULTIC) { - if (inleft < 2) { - if (flags & MBENC_FLUSH) { - code = find_pairencmap - ((ucs2_t)c, 0, - jisx0213_pair_encmap, - JISX0213_ENCPAIRS); - if (code == DBCINV) - return 1; - } - else - return MBERR_TOOFEW; - } - else { - code = find_pairencmap( - (ucs2_t)c, IN2, - jisx0213_pair_encmap, - JISX0213_ENCPAIRS); - if (code == DBCINV) { - code = find_pairencmap( - (ucs2_t)c, 0, - jisx0213_pair_encmap, - JISX0213_ENCPAIRS); - if (code == DBCINV) - return 1; - } - else - insize = 2; - } - } - } - else TRYMAP_ENC(jisxcommon, code, c) { - /* abandon JIS X 0212 codes */ - if (code & 0x8000) - return 1; - } - else return 1; - } - else if (c >> 16 == EMPBASE >> 16) { - EMULATE_JISX0213_2000_ENCODE_EMP(code, c) - else TRYMAP_ENC(jisx0213_emp, code, c&0xffff); - else return insize; - } - else - return insize; - } - - c1 = code >> 8; - c2 = (code & 0xff) - 0x21; - - if (c1 & 0x80) { /* Plane 2 */ - if (c1 >= 0xee) c1 -= 0x87; - else if (c1 >= 0xac || c1 == 0xa8) c1 -= 0x49; - else c1 -= 0x43; - } - else /* Plane 1 */ - c1 -= 0x21; - - if (c1 & 1) c2 += 0x5e; - c1 >>= 1; - OUT1(c1 + (c1 < 0x1f ? 0x81 : 0xc1)) - OUT2(c2 + (c2 < 0x3f ? 0x40 : 0x41)) - - NEXT(insize, 2) - } - - return 0; + while (inleft > 0) { + ucs4_t c = IN1; + DBCHAR code = NOCHAR; + int c1, c2; + Py_ssize_t insize; + + JISX0201_ENCODE(c, code) + else DECODE_SURROGATE(c) + + if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) { + WRITE1((unsigned char)code) + NEXT(1, 1) + continue; + } + + REQUIRE_OUTBUF(2) + insize = GET_INSIZE(c); + + if (code == NOCHAR) { + if (c <= 0xffff) { + EMULATE_JISX0213_2000_ENCODE_BMP(code, c) + else TRYMAP_ENC(jisx0213_bmp, code, c) { + if (code == MULTIC) { + if (inleft < 2) { + if (flags & MBENC_FLUSH) { + code = find_pairencmap + ((ucs2_t)c, 0, + jisx0213_pair_encmap, + JISX0213_ENCPAIRS); + if (code == DBCINV) + return 1; + } + else + return MBERR_TOOFEW; + } + else { + code = find_pairencmap( + (ucs2_t)c, IN2, + jisx0213_pair_encmap, + JISX0213_ENCPAIRS); + if (code == DBCINV) { + code = find_pairencmap( + (ucs2_t)c, 0, + jisx0213_pair_encmap, + JISX0213_ENCPAIRS); + if (code == DBCINV) + return 1; + } + else + insize = 2; + } + } + } + else TRYMAP_ENC(jisxcommon, code, c) { + /* abandon JIS X 0212 codes */ + if (code & 0x8000) + return 1; + } + else return 1; + } + else if (c >> 16 == EMPBASE >> 16) { + EMULATE_JISX0213_2000_ENCODE_EMP(code, c) + else TRYMAP_ENC(jisx0213_emp, code, c&0xffff); + else return insize; + } + else + return insize; + } + + c1 = code >> 8; + c2 = (code & 0xff) - 0x21; + + if (c1 & 0x80) { /* Plane 2 */ + if (c1 >= 0xee) c1 -= 0x87; + else if (c1 >= 0xac || c1 == 0xa8) c1 -= 0x49; + else c1 -= 0x43; + } + else /* Plane 1 */ + c1 -= 0x21; + + if (c1 & 1) c2 += 0x5e; + c1 >>= 1; + OUT1(c1 + (c1 < 0x1f ? 0x81 : 0xc1)) + OUT2(c2 + (c2 < 0x3f ? 0x40 : 0x41)) + + NEXT(insize, 2) + } + + return 0; } DECODER(shift_jis_2004) { - while (inleft > 0) { - unsigned char c = IN1; - - REQUIRE_OUTBUF(1) - JISX0201_DECODE(c, **outbuf) - else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){ - unsigned char c1, c2; - ucs4_t code; - - REQUIRE_INBUF(2) - c2 = IN2; - if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) - return 2; - - c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1); - c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); - c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1)); - c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21; - - if (c1 < 0x5e) { /* Plane 1 */ - c1 += 0x21; - EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf, - c1, c2) - else TRYMAP_DEC(jisx0208, **outbuf, c1, c2) { - NEXT_OUT(1) - } - else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, - c1, c2) { - NEXT_OUT(1) - } - else TRYMAP_DEC(jisx0213_1_emp, code, c1, c2) { - WRITEUCS4(EMPBASE | code) - } - else TRYMAP_DEC(jisx0213_pair, code, c1, c2) { - WRITE2(code >> 16, code & 0xffff) - NEXT_OUT(2) - } - else - return 2; - NEXT_IN(2) - } - else { /* Plane 2 */ - if (c1 >= 0x67) c1 += 0x07; - else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37; - else c1 -= 0x3d; - - EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf, - c1, c2) - else TRYMAP_DEC(jisx0213_2_bmp, **outbuf, - c1, c2) ; - else TRYMAP_DEC(jisx0213_2_emp, code, c1, c2) { - WRITEUCS4(EMPBASE | code) - NEXT_IN(2) - continue; - } - else - return 2; - NEXT(2, 1) - } - continue; - } - else - return 2; - - NEXT(1, 1) /* JIS X 0201 */ - } - - return 0; + while (inleft > 0) { + unsigned char c = IN1; + + REQUIRE_OUTBUF(1) + JISX0201_DECODE(c, **outbuf) + else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){ + unsigned char c1, c2; + ucs4_t code; + + REQUIRE_INBUF(2) + c2 = IN2; + if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) + return 2; + + c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1); + c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); + c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1)); + c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21; + + if (c1 < 0x5e) { /* Plane 1 */ + c1 += 0x21; + EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf, + c1, c2) + else TRYMAP_DEC(jisx0208, **outbuf, c1, c2) { + NEXT_OUT(1) + } + else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, + c1, c2) { + NEXT_OUT(1) + } + else TRYMAP_DEC(jisx0213_1_emp, code, c1, c2) { + WRITEUCS4(EMPBASE | code) + } + else TRYMAP_DEC(jisx0213_pair, code, c1, c2) { + WRITE2(code >> 16, code & 0xffff) + NEXT_OUT(2) + } + else + return 2; + NEXT_IN(2) + } + else { /* Plane 2 */ + if (c1 >= 0x67) c1 += 0x07; + else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37; + else c1 -= 0x3d; + + EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf, + c1, c2) + else TRYMAP_DEC(jisx0213_2_bmp, **outbuf, + c1, c2) ; + else TRYMAP_DEC(jisx0213_2_emp, code, c1, c2) { + WRITEUCS4(EMPBASE | code) + NEXT_IN(2) + continue; + } + else + return 2; + NEXT(2, 1) + } + continue; + } + else + return 2; + + NEXT(1, 1) /* JIS X 0201 */ + } + + return 0; } diff --git a/Modules/cjkcodecs/_codecs_kr.c b/Modules/cjkcodecs/_codecs_kr.c index 161967e..9272e36 100644 --- a/Modules/cjkcodecs/_codecs_kr.c +++ b/Modules/cjkcodecs/_codecs_kr.c @@ -11,151 +11,151 @@ * EUC-KR codec */ -#define EUCKR_JAMO_FIRSTBYTE 0xA4 -#define EUCKR_JAMO_FILLER 0xD4 +#define EUCKR_JAMO_FIRSTBYTE 0xA4 +#define EUCKR_JAMO_FILLER 0xD4 static const unsigned char u2cgk_choseong[19] = { - 0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2, - 0xb3, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, - 0xbc, 0xbd, 0xbe + 0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2, + 0xb3, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, + 0xbc, 0xbd, 0xbe }; static const unsigned char u2cgk_jungseong[21] = { - 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, - 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, - 0xcf, 0xd0, 0xd1, 0xd2, 0xd3 + 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, + 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, + 0xcf, 0xd0, 0xd1, 0xd2, 0xd3 }; static const unsigned char u2cgk_jongseong[28] = { - 0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, - 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, - 0xb1, 0xb2, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xba, - 0xbb, 0xbc, 0xbd, 0xbe + 0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, + 0xb1, 0xb2, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xba, + 0xbb, 0xbc, 0xbd, 0xbe }; ENCODER(euc_kr) { - while (inleft > 0) { - Py_UNICODE c = IN1; - DBCHAR code; - - if (c < 0x80) { - WRITE1((unsigned char)c) - NEXT(1, 1) - continue; - } - UCS4INVALID(c) - - REQUIRE_OUTBUF(2) - TRYMAP_ENC(cp949, code, c); - else return 1; - - if ((code & 0x8000) == 0) { - /* KS X 1001 coded character */ - OUT1((code >> 8) | 0x80) - OUT2((code & 0xFF) | 0x80) - NEXT(1, 2) - } - else { /* Mapping is found in CP949 extension, - * but we encode it in KS X 1001:1998 Annex 3, - * make-up sequence for EUC-KR. */ - - REQUIRE_OUTBUF(8) - - /* syllable composition precedence */ - OUT1(EUCKR_JAMO_FIRSTBYTE) - OUT2(EUCKR_JAMO_FILLER) - - /* All codepoints in CP949 extension are in unicode - * Hangul Syllable area. */ - assert(0xac00 <= c && c <= 0xd7a3); - c -= 0xac00; - - OUT3(EUCKR_JAMO_FIRSTBYTE) - OUT4(u2cgk_choseong[c / 588]) - NEXT_OUT(4) - - OUT1(EUCKR_JAMO_FIRSTBYTE) - OUT2(u2cgk_jungseong[(c / 28) % 21]) - OUT3(EUCKR_JAMO_FIRSTBYTE) - OUT4(u2cgk_jongseong[c % 28]) - NEXT(1, 4) - } - } - - return 0; + while (inleft > 0) { + Py_UNICODE c = IN1; + DBCHAR code; + + if (c < 0x80) { + WRITE1((unsigned char)c) + NEXT(1, 1) + continue; + } + UCS4INVALID(c) + + REQUIRE_OUTBUF(2) + TRYMAP_ENC(cp949, code, c); + else return 1; + + if ((code & 0x8000) == 0) { + /* KS X 1001 coded character */ + OUT1((code >> 8) | 0x80) + OUT2((code & 0xFF) | 0x80) + NEXT(1, 2) + } + else { /* Mapping is found in CP949 extension, + * but we encode it in KS X 1001:1998 Annex 3, + * make-up sequence for EUC-KR. */ + + REQUIRE_OUTBUF(8) + + /* syllable composition precedence */ + OUT1(EUCKR_JAMO_FIRSTBYTE) + OUT2(EUCKR_JAMO_FILLER) + + /* All codepoints in CP949 extension are in unicode + * Hangul Syllable area. */ + assert(0xac00 <= c && c <= 0xd7a3); + c -= 0xac00; + + OUT3(EUCKR_JAMO_FIRSTBYTE) + OUT4(u2cgk_choseong[c / 588]) + NEXT_OUT(4) + + OUT1(EUCKR_JAMO_FIRSTBYTE) + OUT2(u2cgk_jungseong[(c / 28) % 21]) + OUT3(EUCKR_JAMO_FIRSTBYTE) + OUT4(u2cgk_jongseong[c % 28]) + NEXT(1, 4) + } + } + + return 0; } -#define NONE 127 +#define NONE 127 static const unsigned char cgk2u_choseong[] = { /* [A1, BE] */ - 0, 1, NONE, 2, NONE, NONE, 3, 4, - 5, NONE, NONE, NONE, NONE, NONE, NONE, NONE, - 6, 7, 8, NONE, 9, 10, 11, 12, - 13, 14, 15, 16, 17, 18 + 0, 1, NONE, 2, NONE, NONE, 3, 4, + 5, NONE, NONE, NONE, NONE, NONE, NONE, NONE, + 6, 7, 8, NONE, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18 }; static const unsigned char cgk2u_jongseong[] = { /* [A1, BE] */ - 1, 2, 3, 4, 5, 6, 7, NONE, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, NONE, 18, 19, 20, 21, 22, - NONE, 23, 24, 25, 26, 27 + 1, 2, 3, 4, 5, 6, 7, NONE, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, NONE, 18, 19, 20, 21, 22, + NONE, 23, 24, 25, 26, 27 }; DECODER(euc_kr) { - while (inleft > 0) { - unsigned char c = IN1; - - REQUIRE_OUTBUF(1) - - if (c < 0x80) { - OUT1(c) - NEXT(1, 1) - continue; - } - - REQUIRE_INBUF(2) - - if (c == EUCKR_JAMO_FIRSTBYTE && - IN2 == EUCKR_JAMO_FILLER) { - /* KS X 1001:1998 Annex 3 make-up sequence */ - DBCHAR cho, jung, jong; - - REQUIRE_INBUF(8) - if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE || - (*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE || - (*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE) - return 8; - - c = (*inbuf)[3]; - if (0xa1 <= c && c <= 0xbe) - cho = cgk2u_choseong[c - 0xa1]; - else - cho = NONE; - - c = (*inbuf)[5]; - jung = (0xbf <= c && c <= 0xd3) ? c - 0xbf : NONE; - - c = (*inbuf)[7]; - if (c == EUCKR_JAMO_FILLER) - jong = 0; - else if (0xa1 <= c && c <= 0xbe) - jong = cgk2u_jongseong[c - 0xa1]; - else - jong = NONE; - - if (cho == NONE || jung == NONE || jong == NONE) - return 8; - - OUT1(0xac00 + cho*588 + jung*28 + jong); - NEXT(8, 1) - } - else TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) { - NEXT(2, 1) - } - else - return 2; - } - - return 0; + while (inleft > 0) { + unsigned char c = IN1; + + REQUIRE_OUTBUF(1) + + if (c < 0x80) { + OUT1(c) + NEXT(1, 1) + continue; + } + + REQUIRE_INBUF(2) + + if (c == EUCKR_JAMO_FIRSTBYTE && + IN2 == EUCKR_JAMO_FILLER) { + /* KS X 1001:1998 Annex 3 make-up sequence */ + DBCHAR cho, jung, jong; + + REQUIRE_INBUF(8) + if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE || + (*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE || + (*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE) + return 8; + + c = (*inbuf)[3]; + if (0xa1 <= c && c <= 0xbe) + cho = cgk2u_choseong[c - 0xa1]; + else + cho = NONE; + + c = (*inbuf)[5]; + jung = (0xbf <= c && c <= 0xd3) ? c - 0xbf : NONE; + + c = (*inbuf)[7]; + if (c == EUCKR_JAMO_FILLER) + jong = 0; + else if (0xa1 <= c && c <= 0xbe) + jong = cgk2u_jongseong[c - 0xa1]; + else + jong = NONE; + + if (cho == NONE || jung == NONE || jong == NONE) + return 8; + + OUT1(0xac00 + cho*588 + jung*28 + jong); + NEXT(8, 1) + } + else TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) { + NEXT(2, 1) + } + else + return 2; + } + + return 0; } #undef NONE @@ -166,54 +166,54 @@ DECODER(euc_kr) ENCODER(cp949) { - while (inleft > 0) { - Py_UNICODE c = IN1; - DBCHAR code; - - if (c < 0x80) { - WRITE1((unsigned char)c) - NEXT(1, 1) - continue; - } - UCS4INVALID(c) - - REQUIRE_OUTBUF(2) - TRYMAP_ENC(cp949, code, c); - else return 1; - - OUT1((code >> 8) | 0x80) - if (code & 0x8000) - OUT2(code & 0xFF) /* MSB set: CP949 */ - else - OUT2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */ - NEXT(1, 2) - } - - return 0; + while (inleft > 0) { + Py_UNICODE c = IN1; + DBCHAR code; + + if (c < 0x80) { + WRITE1((unsigned char)c) + NEXT(1, 1) + continue; + } + UCS4INVALID(c) + + REQUIRE_OUTBUF(2) + TRYMAP_ENC(cp949, code, c); + else return 1; + + OUT1((code >> 8) | 0x80) + if (code & 0x8000) + OUT2(code & 0xFF) /* MSB set: CP949 */ + else + OUT2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */ + NEXT(1, 2) + } + + return 0; } DECODER(cp949) { - while (inleft > 0) { - unsigned char c = IN1; + while (inleft > 0) { + unsigned char c = IN1; - REQUIRE_OUTBUF(1) + REQUIRE_OUTBUF(1) - if (c < 0x80) { - OUT1(c) - NEXT(1, 1) - continue; - } + if (c < 0x80) { + OUT1(c) + NEXT(1, 1) + continue; + } - REQUIRE_INBUF(2) - TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80); - else TRYMAP_DEC(cp949ext, **outbuf, c, IN2); - else return 2; + REQUIRE_INBUF(2) + TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80); + else TRYMAP_DEC(cp949ext, **outbuf, c, IN2); + else return 2; - NEXT(2, 1) - } + NEXT(2, 1) + } - return 0; + return 0; } @@ -250,58 +250,58 @@ static const DBCHAR u2johabjamo[] = { ENCODER(johab) { - while (inleft > 0) { - Py_UNICODE c = IN1; - DBCHAR code; - - if (c < 0x80) { - WRITE1((unsigned char)c) - NEXT(1, 1) - continue; - } - UCS4INVALID(c) - - REQUIRE_OUTBUF(2) - - if (c >= 0xac00 && c <= 0xd7a3) { - c -= 0xac00; - code = 0x8000 | - (u2johabidx_choseong[c / 588] << 10) | - (u2johabidx_jungseong[(c / 28) % 21] << 5) | - u2johabidx_jongseong[c % 28]; - } - else if (c >= 0x3131 && c <= 0x3163) - code = u2johabjamo[c - 0x3131]; - else TRYMAP_ENC(cp949, code, c) { - unsigned char c1, c2, t2; - unsigned short t1; - - assert((code & 0x8000) == 0); - c1 = code >> 8; - c2 = code & 0xff; - if (((c1 >= 0x21 && c1 <= 0x2c) || - (c1 >= 0x4a && c1 <= 0x7d)) && - (c2 >= 0x21 && c2 <= 0x7e)) { - t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) : - (c1 - 0x21 + 0x197)); - t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21); - OUT1(t1 >> 1) - OUT2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43) - NEXT(1, 2) - continue; - } - else - return 1; - } - else - return 1; - - OUT1(code >> 8) - OUT2(code & 0xff) - NEXT(1, 2) - } - - return 0; + while (inleft > 0) { + Py_UNICODE c = IN1; + DBCHAR code; + + if (c < 0x80) { + WRITE1((unsigned char)c) + NEXT(1, 1) + continue; + } + UCS4INVALID(c) + + REQUIRE_OUTBUF(2) + + if (c >= 0xac00 && c <= 0xd7a3) { + c -= 0xac00; + code = 0x8000 | + (u2johabidx_choseong[c / 588] << 10) | + (u2johabidx_jungseong[(c / 28) % 21] << 5) | + u2johabidx_jongseong[c % 28]; + } + else if (c >= 0x3131 && c <= 0x3163) + code = u2johabjamo[c - 0x3131]; + else TRYMAP_ENC(cp949, code, c) { + unsigned char c1, c2, t2; + unsigned short t1; + + assert((code & 0x8000) == 0); + c1 = code >> 8; + c2 = code & 0xff; + if (((c1 >= 0x21 && c1 <= 0x2c) || + (c1 >= 0x4a && c1 <= 0x7d)) && + (c2 >= 0x21 && c2 <= 0x7e)) { + t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) : + (c1 - 0x21 + 0x197)); + t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21); + OUT1(t1 >> 1) + OUT2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43) + NEXT(1, 2) + continue; + } + else + return 1; + } + else + return 1; + + OUT1(code >> 8) + OUT2(code & 0xff) + NEXT(1, 2) + } + + return 0; } #define FILL 0xfd @@ -347,91 +347,91 @@ static const unsigned char johabjamo_jongseong[32] = { DECODER(johab) { - while (inleft > 0) { - unsigned char c = IN1, c2; - - REQUIRE_OUTBUF(1) - - if (c < 0x80) { - OUT1(c) - NEXT(1, 1) - continue; - } - - REQUIRE_INBUF(2) - c2 = IN2; - - if (c < 0xd8) { - /* johab hangul */ - unsigned char c_cho, c_jung, c_jong; - unsigned char i_cho, i_jung, i_jong; - - c_cho = (c >> 2) & 0x1f; - c_jung = ((c << 3) | c2 >> 5) & 0x1f; - c_jong = c2 & 0x1f; - - i_cho = johabidx_choseong[c_cho]; - i_jung = johabidx_jungseong[c_jung]; - i_jong = johabidx_jongseong[c_jong]; - - if (i_cho == NONE || i_jung == NONE || i_jong == NONE) - return 2; - - /* we don't use U+1100 hangul jamo yet. */ - if (i_cho == FILL) { - if (i_jung == FILL) { - if (i_jong == FILL) - OUT1(0x3000) - else - OUT1(0x3100 | - johabjamo_jongseong[c_jong]) - } - else { - if (i_jong == FILL) - OUT1(0x3100 | - johabjamo_jungseong[c_jung]) - else - return 2; - } - } else { - if (i_jung == FILL) { - if (i_jong == FILL) - OUT1(0x3100 | - johabjamo_choseong[c_cho]) - else - return 2; - } - else - OUT1(0xac00 + - i_cho * 588 + - i_jung * 28 + - (i_jong == FILL ? 0 : i_jong)) - } - NEXT(2, 1) - } else { - /* KS X 1001 except hangul jamos and syllables */ - if (c == 0xdf || c > 0xf9 || - c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) || - (c2 & 0x7f) == 0x7f || - (c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3))) - return 2; - else { - unsigned char t1, t2; - - t1 = (c < 0xe0 ? 2 * (c - 0xd9) : - 2 * c - 0x197); - t2 = (c2 < 0x91 ? c2 - 0x31 : c2 - 0x43); - t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21; - t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21; - - TRYMAP_DEC(ksx1001, **outbuf, t1, t2); - else return 2; - NEXT(2, 1) - } - } - } - - return 0; + while (inleft > 0) { + unsigned char c = IN1, c2; + + REQUIRE_OUTBUF(1) + + if (c < 0x80) { + OUT1(c) + NEXT(1, 1) + continue; + } + + REQUIRE_INBUF(2) + c2 = IN2; + + if (c < 0xd8) { + /* johab hangul */ + unsigned char c_cho, c_jung, c_jong; + unsigned char i_cho, i_jung, i_jong; + + c_cho = (c >> 2) & 0x1f; + c_jung = ((c << 3) | c2 >> 5) & 0x1f; + c_jong = c2 & 0x1f; + + i_cho = johabidx_choseong[c_cho]; + i_jung = johabidx_jungseong[c_jung]; + i_jong = johabidx_jongseong[c_jong]; + + if (i_cho == NONE || i_jung == NONE || i_jong == NONE) + return 2; + + /* we don't use U+1100 hangul jamo yet. */ + if (i_cho == FILL) { + if (i_jung == FILL) { + if (i_jong == FILL) + OUT1(0x3000) + else + OUT1(0x3100 | + johabjamo_jongseong[c_jong]) + } + else { + if (i_jong == FILL) + OUT1(0x3100 | + johabjamo_jungseong[c_jung]) + else + return 2; + } + } else { + if (i_jung == FILL) { + if (i_jong == FILL) + OUT1(0x3100 | + johabjamo_choseong[c_cho]) + else + return 2; + } + else + OUT1(0xac00 + + i_cho * 588 + + i_jung * 28 + + (i_jong == FILL ? 0 : i_jong)) + } + NEXT(2, 1) + } else { + /* KS X 1001 except hangul jamos and syllables */ + if (c == 0xdf || c > 0xf9 || + c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) || + (c2 & 0x7f) == 0x7f || + (c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3))) + return 2; + else { + unsigned char t1, t2; + + t1 = (c < 0xe0 ? 2 * (c - 0xd9) : + 2 * c - 0x197); + t2 = (c2 < 0x91 ? c2 - 0x31 : c2 - 0x43); + t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21; + t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21; + + TRYMAP_DEC(ksx1001, **outbuf, t1, t2); + else return 2; + NEXT(2, 1) + } + } + } + + return 0; } #undef NONE #undef FILL diff --git a/Modules/cjkcodecs/_codecs_tw.c b/Modules/cjkcodecs/_codecs_tw.c index 8ccbca1..38cf723 100644 --- a/Modules/cjkcodecs/_codecs_tw.c +++ b/Modules/cjkcodecs/_codecs_tw.c @@ -13,52 +13,52 @@ ENCODER(big5) { - while (inleft > 0) { - Py_UNICODE c = **inbuf; - DBCHAR code; + while (inleft > 0) { + Py_UNICODE c = **inbuf; + DBCHAR code; - if (c < 0x80) { - REQUIRE_OUTBUF(1) - **outbuf = (unsigned char)c; - NEXT(1, 1) - continue; - } - UCS4INVALID(c) + if (c < 0x80) { + REQUIRE_OUTBUF(1) + **outbuf = (unsigned char)c; + NEXT(1, 1) + continue; + } + UCS4INVALID(c) - REQUIRE_OUTBUF(2) + REQUIRE_OUTBUF(2) - TRYMAP_ENC(big5, code, c); - else return 1; + TRYMAP_ENC(big5, code, c); + else return 1; - OUT1(code >> 8) - OUT2(code & 0xFF) - NEXT(1, 2) - } + OUT1(code >> 8) + OUT2(code & 0xFF) + NEXT(1, 2) + } - return 0; + return 0; } DECODER(big5) { - while (inleft > 0) { - unsigned char c = IN1; + while (inleft > 0) { + unsigned char c = IN1; - REQUIRE_OUTBUF(1) + REQUIRE_OUTBUF(1) - if (c < 0x80) { - OUT1(c) - NEXT(1, 1) - continue; - } + if (c < 0x80) { + OUT1(c) + NEXT(1, 1) + continue; + } - REQUIRE_INBUF(2) - TRYMAP_DEC(big5, **outbuf, c, IN2) { - NEXT(2, 1) - } - else return 2; - } + REQUIRE_INBUF(2) + TRYMAP_DEC(big5, **outbuf, c, IN2) { + NEXT(2, 1) + } + else return 2; + } - return 0; + return 0; } @@ -68,53 +68,53 @@ DECODER(big5) ENCODER(cp950) { - while (inleft > 0) { - Py_UNICODE c = IN1; - DBCHAR code; - - if (c < 0x80) { - WRITE1((unsigned char)c) - NEXT(1, 1) - continue; - } - UCS4INVALID(c) - - REQUIRE_OUTBUF(2) - TRYMAP_ENC(cp950ext, code, c); - else TRYMAP_ENC(big5, code, c); - else return 1; - - OUT1(code >> 8) - OUT2(code & 0xFF) - NEXT(1, 2) - } - - return 0; + while (inleft > 0) { + Py_UNICODE c = IN1; + DBCHAR code; + + if (c < 0x80) { + WRITE1((unsigned char)c) + NEXT(1, 1) + continue; + } + UCS4INVALID(c) + + REQUIRE_OUTBUF(2) + TRYMAP_ENC(cp950ext, code, c); + else TRYMAP_ENC(big5, code, c); + else return 1; + + OUT1(code >> 8) + OUT2(code & 0xFF) + NEXT(1, 2) + } + + return 0; } DECODER(cp950) { - while (inleft > 0) { - unsigned char c = IN1; + while (inleft > 0) { + unsigned char c = IN1; - REQUIRE_OUTBUF(1) + REQUIRE_OUTBUF(1) - if (c < 0x80) { - OUT1(c) - NEXT(1, 1) - continue; - } + if (c < 0x80) { + OUT1(c) + NEXT(1, 1) + continue; + } - REQUIRE_INBUF(2) + REQUIRE_INBUF(2) - TRYMAP_DEC(cp950ext, **outbuf, c, IN2); - else TRYMAP_DEC(big5, **outbuf, c, IN2); - else return 2; + TRYMAP_DEC(cp950ext, **outbuf, c, IN2); + else TRYMAP_DEC(big5, **outbuf, c, IN2); + else return 2; - NEXT(2, 1) - } + NEXT(2, 1) + } - return 0; + return 0; } diff --git a/Modules/cjkcodecs/alg_jisx0201.h b/Modules/cjkcodecs/alg_jisx0201.h index 1fca06b..0bc7db5 100644 --- a/Modules/cjkcodecs/alg_jisx0201.h +++ b/Modules/cjkcodecs/alg_jisx0201.h @@ -1,24 +1,24 @@ -#define JISX0201_R_ENCODE(c, assi) \ - if ((c) < 0x80 && (c) != 0x5c && (c) != 0x7e) \ - (assi) = (c); \ - else if ((c) == 0x00a5) (assi) = 0x5c; \ - else if ((c) == 0x203e) (assi) = 0x7e; -#define JISX0201_K_ENCODE(c, assi) \ - if ((c) >= 0xff61 && (c) <= 0xff9f) \ - (assi) = (c) - 0xfec0; -#define JISX0201_ENCODE(c, assi) \ - JISX0201_R_ENCODE(c, assi) \ - else JISX0201_K_ENCODE(c, assi) +#define JISX0201_R_ENCODE(c, assi) \ + if ((c) < 0x80 && (c) != 0x5c && (c) != 0x7e) \ + (assi) = (c); \ + else if ((c) == 0x00a5) (assi) = 0x5c; \ + else if ((c) == 0x203e) (assi) = 0x7e; +#define JISX0201_K_ENCODE(c, assi) \ + if ((c) >= 0xff61 && (c) <= 0xff9f) \ + (assi) = (c) - 0xfec0; +#define JISX0201_ENCODE(c, assi) \ + JISX0201_R_ENCODE(c, assi) \ + else JISX0201_K_ENCODE(c, assi) -#define JISX0201_R_DECODE(c, assi) \ - if ((c) < 0x5c) (assi) = (c); \ - else if ((c) == 0x5c) (assi) = 0x00a5; \ - else if ((c) < 0x7e) (assi) = (c); \ - else if ((c) == 0x7e) (assi) = 0x203e; \ - else if ((c) == 0x7f) (assi) = 0x7f; -#define JISX0201_K_DECODE(c, assi) \ - if ((c) >= 0xa1 && (c) <= 0xdf) \ - (assi) = 0xfec0 + (c); -#define JISX0201_DECODE(c, assi) \ - JISX0201_R_DECODE(c, assi) \ - else JISX0201_K_DECODE(c, assi) +#define JISX0201_R_DECODE(c, assi) \ + if ((c) < 0x5c) (assi) = (c); \ + else if ((c) == 0x5c) (assi) = 0x00a5; \ + else if ((c) < 0x7e) (assi) = (c); \ + else if ((c) == 0x7e) (assi) = 0x203e; \ + else if ((c) == 0x7f) (assi) = 0x7f; +#define JISX0201_K_DECODE(c, assi) \ + if ((c) >= 0xa1 && (c) <= 0xdf) \ + (assi) = 0xfec0 + (c); +#define JISX0201_DECODE(c, assi) \ + JISX0201_R_DECODE(c, assi) \ + else JISX0201_K_DECODE(c, assi) diff --git a/Modules/cjkcodecs/cjkcodecs.h b/Modules/cjkcodecs/cjkcodecs.h index e630671..ab0682a 100644 --- a/Modules/cjkcodecs/cjkcodecs.h +++ b/Modules/cjkcodecs/cjkcodecs.h @@ -13,12 +13,12 @@ /* a unicode "undefined" codepoint */ -#define UNIINV 0xFFFE +#define UNIINV 0xFFFE /* internal-use DBCS codepoints which aren't used by any charsets */ -#define NOCHAR 0xFFFF -#define MULTIC 0xFFFE -#define DBCINV 0xFFFD +#define NOCHAR 0xFFFF +#define MULTIC 0xFFFE +#define DBCINV 0xFFFD /* shorter macros to save source size of mapping tables */ #define U UNIINV @@ -27,94 +27,94 @@ #define D DBCINV struct dbcs_index { - const ucs2_t *map; - unsigned char bottom, top; + const ucs2_t *map; + unsigned char bottom, top; }; typedef struct dbcs_index decode_map; struct widedbcs_index { - const ucs4_t *map; - unsigned char bottom, top; + const ucs4_t *map; + unsigned char bottom, top; }; typedef struct widedbcs_index widedecode_map; struct unim_index { - const DBCHAR *map; - unsigned char bottom, top; + const DBCHAR *map; + unsigned char bottom, top; }; typedef struct unim_index encode_map; struct unim_index_bytebased { - const unsigned char *map; - unsigned char bottom, top; + const unsigned char *map; + unsigned char bottom, top; }; struct dbcs_map { - const char *charset; - const struct unim_index *encmap; - const struct dbcs_index *decmap; + const char *charset; + const struct unim_index *encmap; + const struct dbcs_index *decmap; }; struct pair_encodemap { - ucs4_t uniseq; - DBCHAR code; + ucs4_t uniseq; + DBCHAR code; }; static const MultibyteCodec *codec_list; static const struct dbcs_map *mapping_list; -#define CODEC_INIT(encoding) \ - static int encoding##_codec_init(const void *config) - -#define ENCODER_INIT(encoding) \ - static int encoding##_encode_init( \ - MultibyteCodec_State *state, const void *config) -#define ENCODER(encoding) \ - static Py_ssize_t encoding##_encode( \ - MultibyteCodec_State *state, const void *config, \ - const Py_UNICODE **inbuf, Py_ssize_t inleft, \ - unsigned char **outbuf, Py_ssize_t outleft, int flags) -#define ENCODER_RESET(encoding) \ - static Py_ssize_t encoding##_encode_reset( \ - MultibyteCodec_State *state, const void *config, \ - unsigned char **outbuf, Py_ssize_t outleft) - -#define DECODER_INIT(encoding) \ - static int encoding##_decode_init( \ - MultibyteCodec_State *state, const void *config) -#define DECODER(encoding) \ - static Py_ssize_t encoding##_decode( \ - MultibyteCodec_State *state, const void *config, \ - const unsigned char **inbuf, Py_ssize_t inleft, \ - Py_UNICODE **outbuf, Py_ssize_t outleft) -#define DECODER_RESET(encoding) \ - static Py_ssize_t encoding##_decode_reset( \ - MultibyteCodec_State *state, const void *config) +#define CODEC_INIT(encoding) \ + static int encoding##_codec_init(const void *config) + +#define ENCODER_INIT(encoding) \ + static int encoding##_encode_init( \ + MultibyteCodec_State *state, const void *config) +#define ENCODER(encoding) \ + static Py_ssize_t encoding##_encode( \ + MultibyteCodec_State *state, const void *config, \ + const Py_UNICODE **inbuf, Py_ssize_t inleft, \ + unsigned char **outbuf, Py_ssize_t outleft, int flags) +#define ENCODER_RESET(encoding) \ + static Py_ssize_t encoding##_encode_reset( \ + MultibyteCodec_State *state, const void *config, \ + unsigned char **outbuf, Py_ssize_t outleft) + +#define DECODER_INIT(encoding) \ + static int encoding##_decode_init( \ + MultibyteCodec_State *state, const void *config) +#define DECODER(encoding) \ + static Py_ssize_t encoding##_decode( \ + MultibyteCodec_State *state, const void *config, \ + const unsigned char **inbuf, Py_ssize_t inleft, \ + Py_UNICODE **outbuf, Py_ssize_t outleft) +#define DECODER_RESET(encoding) \ + static Py_ssize_t encoding##_decode_reset( \ + MultibyteCodec_State *state, const void *config) #if Py_UNICODE_SIZE == 4 -#define UCS4INVALID(code) \ - if ((code) > 0xFFFF) \ - return 1; +#define UCS4INVALID(code) \ + if ((code) > 0xFFFF) \ + return 1; #else -#define UCS4INVALID(code) \ - if (0) ; +#define UCS4INVALID(code) \ + if (0) ; #endif -#define NEXT_IN(i) \ - (*inbuf) += (i); \ - (inleft) -= (i); -#define NEXT_OUT(o) \ - (*outbuf) += (o); \ - (outleft) -= (o); -#define NEXT(i, o) \ - NEXT_IN(i) NEXT_OUT(o) - -#define REQUIRE_INBUF(n) \ - if (inleft < (n)) \ - return MBERR_TOOFEW; -#define REQUIRE_OUTBUF(n) \ - if (outleft < (n)) \ - return MBERR_TOOSMALL; +#define NEXT_IN(i) \ + (*inbuf) += (i); \ + (inleft) -= (i); +#define NEXT_OUT(o) \ + (*outbuf) += (o); \ + (outleft) -= (o); +#define NEXT(i, o) \ + NEXT_IN(i) NEXT_OUT(o) + +#define REQUIRE_INBUF(n) \ + if (inleft < (n)) \ + return MBERR_TOOFEW; +#define REQUIRE_OUTBUF(n) \ + if (outleft < (n)) \ + return MBERR_TOOSMALL; #define IN1 ((*inbuf)[0]) #define IN2 ((*inbuf)[1]) @@ -126,289 +126,289 @@ static const struct dbcs_map *mapping_list; #define OUT3(c) ((*outbuf)[2]) = (c); #define OUT4(c) ((*outbuf)[3]) = (c); -#define WRITE1(c1) \ - REQUIRE_OUTBUF(1) \ - (*outbuf)[0] = (c1); -#define WRITE2(c1, c2) \ - REQUIRE_OUTBUF(2) \ - (*outbuf)[0] = (c1); \ - (*outbuf)[1] = (c2); -#define WRITE3(c1, c2, c3) \ - REQUIRE_OUTBUF(3) \ - (*outbuf)[0] = (c1); \ - (*outbuf)[1] = (c2); \ - (*outbuf)[2] = (c3); -#define WRITE4(c1, c2, c3, c4) \ - REQUIRE_OUTBUF(4) \ - (*outbuf)[0] = (c1); \ - (*outbuf)[1] = (c2); \ - (*outbuf)[2] = (c3); \ - (*outbuf)[3] = (c4); +#define WRITE1(c1) \ + REQUIRE_OUTBUF(1) \ + (*outbuf)[0] = (c1); +#define WRITE2(c1, c2) \ + REQUIRE_OUTBUF(2) \ + (*outbuf)[0] = (c1); \ + (*outbuf)[1] = (c2); +#define WRITE3(c1, c2, c3) \ + REQUIRE_OUTBUF(3) \ + (*outbuf)[0] = (c1); \ + (*outbuf)[1] = (c2); \ + (*outbuf)[2] = (c3); +#define WRITE4(c1, c2, c3, c4) \ + REQUIRE_OUTBUF(4) \ + (*outbuf)[0] = (c1); \ + (*outbuf)[1] = (c2); \ + (*outbuf)[2] = (c3); \ + (*outbuf)[3] = (c4); #if Py_UNICODE_SIZE == 2 -# define WRITEUCS4(c) \ - REQUIRE_OUTBUF(2) \ - (*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10); \ - (*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff); \ - NEXT_OUT(2) +# define WRITEUCS4(c) \ + REQUIRE_OUTBUF(2) \ + (*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10); \ + (*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff); \ + NEXT_OUT(2) #else -# define WRITEUCS4(c) \ - REQUIRE_OUTBUF(1) \ - **outbuf = (Py_UNICODE)(c); \ - NEXT_OUT(1) +# define WRITEUCS4(c) \ + REQUIRE_OUTBUF(1) \ + **outbuf = (Py_UNICODE)(c); \ + NEXT_OUT(1) #endif -#define _TRYMAP_ENC(m, assi, val) \ - ((m)->map != NULL && (val) >= (m)->bottom && \ - (val)<= (m)->top && ((assi) = (m)->map[(val) - \ - (m)->bottom]) != NOCHAR) -#define TRYMAP_ENC_COND(charset, assi, uni) \ - _TRYMAP_ENC(&charset##_encmap[(uni) >> 8], assi, (uni) & 0xff) -#define TRYMAP_ENC(charset, assi, uni) \ - if TRYMAP_ENC_COND(charset, assi, uni) - -#define _TRYMAP_DEC(m, assi, val) \ - ((m)->map != NULL && (val) >= (m)->bottom && \ - (val)<= (m)->top && ((assi) = (m)->map[(val) - \ - (m)->bottom]) != UNIINV) -#define TRYMAP_DEC(charset, assi, c1, c2) \ - if _TRYMAP_DEC(&charset##_decmap[c1], assi, c2) - -#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) \ - ((m)->map != NULL && (val) >= (m)->bottom && \ - (val)<= (m)->top && \ - ((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \ - (((asshi) = (m)->map[((val) - (m)->bottom)*3 + 1]), 1) && \ - (((asslo) = (m)->map[((val) - (m)->bottom)*3 + 2]), 1)) -#define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni) \ - if _TRYMAP_ENC_MPLANE(&charset##_encmap[(uni) >> 8], \ - assplane, asshi, asslo, (uni) & 0xff) -#define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2) \ - if _TRYMAP_DEC(&charset##_decmap[plane][c1], assi, c2) +#define _TRYMAP_ENC(m, assi, val) \ + ((m)->map != NULL && (val) >= (m)->bottom && \ + (val)<= (m)->top && ((assi) = (m)->map[(val) - \ + (m)->bottom]) != NOCHAR) +#define TRYMAP_ENC_COND(charset, assi, uni) \ + _TRYMAP_ENC(&charset##_encmap[(uni) >> 8], assi, (uni) & 0xff) +#define TRYMAP_ENC(charset, assi, uni) \ + if TRYMAP_ENC_COND(charset, assi, uni) + +#define _TRYMAP_DEC(m, assi, val) \ + ((m)->map != NULL && (val) >= (m)->bottom && \ + (val)<= (m)->top && ((assi) = (m)->map[(val) - \ + (m)->bottom]) != UNIINV) +#define TRYMAP_DEC(charset, assi, c1, c2) \ + if _TRYMAP_DEC(&charset##_decmap[c1], assi, c2) + +#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) \ + ((m)->map != NULL && (val) >= (m)->bottom && \ + (val)<= (m)->top && \ + ((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \ + (((asshi) = (m)->map[((val) - (m)->bottom)*3 + 1]), 1) && \ + (((asslo) = (m)->map[((val) - (m)->bottom)*3 + 2]), 1)) +#define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni) \ + if _TRYMAP_ENC_MPLANE(&charset##_encmap[(uni) >> 8], \ + assplane, asshi, asslo, (uni) & 0xff) +#define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2) \ + if _TRYMAP_DEC(&charset##_decmap[plane][c1], assi, c2) #if Py_UNICODE_SIZE == 2 -#define DECODE_SURROGATE(c) \ - if (c >> 10 == 0xd800 >> 10) { /* high surrogate */ \ - REQUIRE_INBUF(2) \ - if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ \ - c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \ - ((ucs4_t)(IN2) - 0xdc00); \ - } \ - } -#define GET_INSIZE(c) ((c) > 0xffff ? 2 : 1) +#define DECODE_SURROGATE(c) \ + if (c >> 10 == 0xd800 >> 10) { /* high surrogate */ \ + REQUIRE_INBUF(2) \ + if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ \ + c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \ + ((ucs4_t)(IN2) - 0xdc00); \ + } \ + } +#define GET_INSIZE(c) ((c) > 0xffff ? 2 : 1) #else #define DECODE_SURROGATE(c) {;} -#define GET_INSIZE(c) 1 +#define GET_INSIZE(c) 1 #endif #define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = { #define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL}, #define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap}, #define MAPPING_ENCDEC(enc) {#enc, (void*)enc##_encmap, (void*)enc##_decmap}, -#define END_MAPPINGS_LIST \ - {"", NULL, NULL} }; \ - static const struct dbcs_map *mapping_list = \ - (const struct dbcs_map *)_mapping_list; +#define END_MAPPINGS_LIST \ + {"", NULL, NULL} }; \ + static const struct dbcs_map *mapping_list = \ + (const struct dbcs_map *)_mapping_list; #define BEGIN_CODECS_LIST static const MultibyteCodec _codec_list[] = { -#define _STATEFUL_METHODS(enc) \ - enc##_encode, \ - enc##_encode_init, \ - enc##_encode_reset, \ - enc##_decode, \ - enc##_decode_init, \ - enc##_decode_reset, -#define _STATELESS_METHODS(enc) \ - enc##_encode, NULL, NULL, \ - enc##_decode, NULL, NULL, -#define CODEC_STATEFUL(enc) { \ - #enc, NULL, NULL, \ - _STATEFUL_METHODS(enc) \ +#define _STATEFUL_METHODS(enc) \ + enc##_encode, \ + enc##_encode_init, \ + enc##_encode_reset, \ + enc##_decode, \ + enc##_decode_init, \ + enc##_decode_reset, +#define _STATELESS_METHODS(enc) \ + enc##_encode, NULL, NULL, \ + enc##_decode, NULL, NULL, +#define CODEC_STATEFUL(enc) { \ + #enc, NULL, NULL, \ + _STATEFUL_METHODS(enc) \ }, -#define CODEC_STATELESS(enc) { \ - #enc, NULL, NULL, \ - _STATELESS_METHODS(enc) \ +#define CODEC_STATELESS(enc) { \ + #enc, NULL, NULL, \ + _STATELESS_METHODS(enc) \ }, -#define CODEC_STATELESS_WINIT(enc) { \ - #enc, NULL, \ - enc##_codec_init, \ - _STATELESS_METHODS(enc) \ +#define CODEC_STATELESS_WINIT(enc) { \ + #enc, NULL, \ + enc##_codec_init, \ + _STATELESS_METHODS(enc) \ }, -#define END_CODECS_LIST \ - {"", NULL,} }; \ - static const MultibyteCodec *codec_list = \ - (const MultibyteCodec *)_codec_list; +#define END_CODECS_LIST \ + {"", NULL,} }; \ + static const MultibyteCodec *codec_list = \ + (const MultibyteCodec *)_codec_list; static PyObject * getmultibytecodec(void) { - static PyObject *cofunc = NULL; - - if (cofunc == NULL) { - PyObject *mod = PyImport_ImportModuleNoBlock("_multibytecodec"); - if (mod == NULL) - return NULL; - cofunc = PyObject_GetAttrString(mod, "__create_codec"); - Py_DECREF(mod); - } - return cofunc; + static PyObject *cofunc = NULL; + + if (cofunc == NULL) { + PyObject *mod = PyImport_ImportModuleNoBlock("_multibytecodec"); + if (mod == NULL) + return NULL; + cofunc = PyObject_GetAttrString(mod, "__create_codec"); + Py_DECREF(mod); + } + return cofunc; } static PyObject * getcodec(PyObject *self, PyObject *encoding) { - PyObject *codecobj, *r, *cofunc; - const MultibyteCodec *codec; - const char *enc; - - if (!PyUnicode_Check(encoding)) { - PyErr_SetString(PyExc_TypeError, - "encoding name must be a string."); - return NULL; - } - enc = _PyUnicode_AsString(encoding); - if (enc == NULL) - return NULL; - - cofunc = getmultibytecodec(); - if (cofunc == NULL) - return NULL; - - for (codec = codec_list; codec->encoding[0]; codec++) - if (strcmp(codec->encoding, enc) == 0) - break; - - if (codec->encoding[0] == '\0') { - PyErr_SetString(PyExc_LookupError, - "no such codec is supported."); - return NULL; - } - - codecobj = PyCapsule_New((void *)codec, PyMultibyteCodec_CAPSULE_NAME, NULL); - if (codecobj == NULL) - return NULL; - - r = PyObject_CallFunctionObjArgs(cofunc, codecobj, NULL); - Py_DECREF(codecobj); - - return r; + PyObject *codecobj, *r, *cofunc; + const MultibyteCodec *codec; + const char *enc; + + if (!PyUnicode_Check(encoding)) { + PyErr_SetString(PyExc_TypeError, + "encoding name must be a string."); + return NULL; + } + enc = _PyUnicode_AsString(encoding); + if (enc == NULL) + return NULL; + + cofunc = getmultibytecodec(); + if (cofunc == NULL) + return NULL; + + for (codec = codec_list; codec->encoding[0]; codec++) + if (strcmp(codec->encoding, enc) == 0) + break; + + if (codec->encoding[0] == '\0') { + PyErr_SetString(PyExc_LookupError, + "no such codec is supported."); + return NULL; + } + + codecobj = PyCapsule_New((void *)codec, PyMultibyteCodec_CAPSULE_NAME, NULL); + if (codecobj == NULL) + return NULL; + + r = PyObject_CallFunctionObjArgs(cofunc, codecobj, NULL); + Py_DECREF(codecobj); + + return r; } static struct PyMethodDef __methods[] = { - {"getcodec", (PyCFunction)getcodec, METH_O, ""}, - {NULL, NULL}, + {"getcodec", (PyCFunction)getcodec, METH_O, ""}, + {NULL, NULL}, }; static int register_maps(PyObject *module) { - const struct dbcs_map *h; - - for (h = mapping_list; h->charset[0] != '\0'; h++) { - char mhname[256] = "__map_"; - int r; - strcpy(mhname + sizeof("__map_") - 1, h->charset); - r = PyModule_AddObject(module, mhname, - PyCapsule_New((void *)h, PyMultibyteCodec_CAPSULE_NAME, NULL)); - if (r == -1) - return -1; - } - return 0; + const struct dbcs_map *h; + + for (h = mapping_list; h->charset[0] != '\0'; h++) { + char mhname[256] = "__map_"; + int r; + strcpy(mhname + sizeof("__map_") - 1, h->charset); + r = PyModule_AddObject(module, mhname, + PyCapsule_New((void *)h, PyMultibyteCodec_CAPSULE_NAME, NULL)); + if (r == -1) + return -1; + } + return 0; } #ifdef USING_BINARY_PAIR_SEARCH static DBCHAR find_pairencmap(ucs2_t body, ucs2_t modifier, - const struct pair_encodemap *haystack, int haystacksize) + const struct pair_encodemap *haystack, int haystacksize) { - int pos, min, max; - ucs4_t value = body << 16 | modifier; - - min = 0; - max = haystacksize; - - for (pos = haystacksize >> 1; min != max; pos = (min + max) >> 1) - if (value < haystack[pos].uniseq) { - if (max == pos) break; - else max = pos; - } - else if (value > haystack[pos].uniseq) { - if (min == pos) break; - else min = pos; - } - else - break; - - if (value == haystack[pos].uniseq) - return haystack[pos].code; - else - return DBCINV; + int pos, min, max; + ucs4_t value = body << 16 | modifier; + + min = 0; + max = haystacksize; + + for (pos = haystacksize >> 1; min != max; pos = (min + max) >> 1) + if (value < haystack[pos].uniseq) { + if (max == pos) break; + else max = pos; + } + else if (value > haystack[pos].uniseq) { + if (min == pos) break; + else min = pos; + } + else + break; + + if (value == haystack[pos].uniseq) + return haystack[pos].code; + else + return DBCINV; } #endif #ifdef USING_IMPORTED_MAPS #define IMPORT_MAP(locale, charset, encmap, decmap) \ - importmap("_codecs_" #locale, "__map_" #charset, \ - (const void**)encmap, (const void**)decmap) + importmap("_codecs_" #locale, "__map_" #charset, \ + (const void**)encmap, (const void**)decmap) static int importmap(const char *modname, const char *symbol, - const void **encmap, const void **decmap) + const void **encmap, const void **decmap) { - PyObject *o, *mod; - - mod = PyImport_ImportModule((char *)modname); - if (mod == NULL) - return -1; - - o = PyObject_GetAttrString(mod, (char*)symbol); - if (o == NULL) - goto errorexit; - else if (!PyCapsule_IsValid(o, PyMultibyteCodec_CAPSULE_NAME)) { - PyErr_SetString(PyExc_ValueError, - "map data must be a Capsule."); - goto errorexit; - } - else { - struct dbcs_map *map; - map = PyCapsule_GetPointer(o, PyMultibyteCodec_CAPSULE_NAME); - if (encmap != NULL) - *encmap = map->encmap; - if (decmap != NULL) - *decmap = map->decmap; - Py_DECREF(o); - } - - Py_DECREF(mod); - return 0; + PyObject *o, *mod; + + mod = PyImport_ImportModule((char *)modname); + if (mod == NULL) + return -1; + + o = PyObject_GetAttrString(mod, (char*)symbol); + if (o == NULL) + goto errorexit; + else if (!PyCapsule_IsValid(o, PyMultibyteCodec_CAPSULE_NAME)) { + PyErr_SetString(PyExc_ValueError, + "map data must be a Capsule."); + goto errorexit; + } + else { + struct dbcs_map *map; + map = PyCapsule_GetPointer(o, PyMultibyteCodec_CAPSULE_NAME); + if (encmap != NULL) + *encmap = map->encmap; + if (decmap != NULL) + *decmap = map->decmap; + Py_DECREF(o); + } + + Py_DECREF(mod); + return 0; errorexit: - Py_DECREF(mod); - return -1; + Py_DECREF(mod); + return -1; } #endif -#define I_AM_A_MODULE_FOR(loc) \ - static struct PyModuleDef __module = { \ - PyModuleDef_HEAD_INIT, \ - "_codecs_"#loc, \ - NULL, \ - 0, \ - __methods, \ - NULL, \ - NULL, \ - NULL, \ - NULL \ - }; \ - PyObject* \ - PyInit__codecs_##loc(void) \ - { \ - PyObject *m = PyModule_Create(&__module); \ - if (m != NULL) \ - (void)register_maps(m); \ - return m; \ - } +#define I_AM_A_MODULE_FOR(loc) \ + static struct PyModuleDef __module = { \ + PyModuleDef_HEAD_INIT, \ + "_codecs_"#loc, \ + NULL, \ + 0, \ + __methods, \ + NULL, \ + NULL, \ + NULL, \ + NULL \ + }; \ + PyObject* \ + PyInit__codecs_##loc(void) \ + { \ + PyObject *m = PyModule_Create(&__module); \ + if (m != NULL) \ + (void)register_maps(m); \ + return m; \ + } #endif diff --git a/Modules/cjkcodecs/emu_jisx0213_2000.h b/Modules/cjkcodecs/emu_jisx0213_2000.h index 250c673..4227fb2 100644 --- a/Modules/cjkcodecs/emu_jisx0213_2000.h +++ b/Modules/cjkcodecs/emu_jisx0213_2000.h @@ -5,39 +5,39 @@ #define EMULATE_JISX0213_2000_ENCODE_INVALID 1 #endif -#define EMULATE_JISX0213_2000_ENCODE_BMP(assi, c) \ - if (config == (void *)2000 && ( \ - (c) == 0x9B1C || (c) == 0x4FF1 || \ - (c) == 0x525D || (c) == 0x541E || \ - (c) == 0x5653 || (c) == 0x59F8 || \ - (c) == 0x5C5B || (c) == 0x5E77 || \ - (c) == 0x7626 || (c) == 0x7E6B)) \ - return EMULATE_JISX0213_2000_ENCODE_INVALID; \ - else if (config == (void *)2000 && (c) == 0x9B1D) \ - (assi) = 0x8000 | 0x7d3b; \ +#define EMULATE_JISX0213_2000_ENCODE_BMP(assi, c) \ + if (config == (void *)2000 && ( \ + (c) == 0x9B1C || (c) == 0x4FF1 || \ + (c) == 0x525D || (c) == 0x541E || \ + (c) == 0x5653 || (c) == 0x59F8 || \ + (c) == 0x5C5B || (c) == 0x5E77 || \ + (c) == 0x7626 || (c) == 0x7E6B)) \ + return EMULATE_JISX0213_2000_ENCODE_INVALID; \ + else if (config == (void *)2000 && (c) == 0x9B1D) \ + (assi) = 0x8000 | 0x7d3b; \ -#define EMULATE_JISX0213_2000_ENCODE_EMP(assi, c) \ - if (config == (void *)2000 && (c) == 0x20B9F) \ - return EMULATE_JISX0213_2000_ENCODE_INVALID; +#define EMULATE_JISX0213_2000_ENCODE_EMP(assi, c) \ + if (config == (void *)2000 && (c) == 0x20B9F) \ + return EMULATE_JISX0213_2000_ENCODE_INVALID; #ifndef EMULATE_JISX0213_2000_DECODE_INVALID #define EMULATE_JISX0213_2000_DECODE_INVALID 2 #endif -#define EMULATE_JISX0213_2000_DECODE_PLANE1(assi, c1, c2) \ - if (config == (void *)2000 && \ - (((c1) == 0x2E && (c2) == 0x21) || \ - ((c1) == 0x2F && (c2) == 0x7E) || \ - ((c1) == 0x4F && (c2) == 0x54) || \ - ((c1) == 0x4F && (c2) == 0x7E) || \ - ((c1) == 0x74 && (c2) == 0x27) || \ - ((c1) == 0x7E && (c2) == 0x7A) || \ - ((c1) == 0x7E && (c2) == 0x7B) || \ - ((c1) == 0x7E && (c2) == 0x7C) || \ - ((c1) == 0x7E && (c2) == 0x7D) || \ - ((c1) == 0x7E && (c2) == 0x7E))) \ - return EMULATE_JISX0213_2000_DECODE_INVALID; +#define EMULATE_JISX0213_2000_DECODE_PLANE1(assi, c1, c2) \ + if (config == (void *)2000 && \ + (((c1) == 0x2E && (c2) == 0x21) || \ + ((c1) == 0x2F && (c2) == 0x7E) || \ + ((c1) == 0x4F && (c2) == 0x54) || \ + ((c1) == 0x4F && (c2) == 0x7E) || \ + ((c1) == 0x74 && (c2) == 0x27) || \ + ((c1) == 0x7E && (c2) == 0x7A) || \ + ((c1) == 0x7E && (c2) == 0x7B) || \ + ((c1) == 0x7E && (c2) == 0x7C) || \ + ((c1) == 0x7E && (c2) == 0x7D) || \ + ((c1) == 0x7E && (c2) == 0x7E))) \ + return EMULATE_JISX0213_2000_DECODE_INVALID; -#define EMULATE_JISX0213_2000_DECODE_PLANE2(assi, c1, c2) \ - if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B) \ - (assi) = 0x9B1D; +#define EMULATE_JISX0213_2000_DECODE_PLANE2(assi, c1, c2) \ + if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B) \ + (assi) = 0x9B1D; diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c index 1735dfd..af7ea5b 100644 --- a/Modules/cjkcodecs/multibytecodec.c +++ b/Modules/cjkcodecs/multibytecodec.c @@ -45,179 +45,179 @@ static char *incrementalkwarglist[] = {"input", "final", NULL}; static char *streamkwarglist[] = {"stream", "errors", NULL}; static PyObject *multibytecodec_encode(MultibyteCodec *, - MultibyteCodec_State *, const Py_UNICODE **, Py_ssize_t, - PyObject *, int); + MultibyteCodec_State *, const Py_UNICODE **, Py_ssize_t, + PyObject *, int); -#define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */ +#define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */ static PyObject * make_tuple(PyObject *object, Py_ssize_t len) { - PyObject *v, *w; - - if (object == NULL) - return NULL; - - v = PyTuple_New(2); - if (v == NULL) { - Py_DECREF(object); - return NULL; - } - PyTuple_SET_ITEM(v, 0, object); - - w = PyLong_FromSsize_t(len); - if (w == NULL) { - Py_DECREF(v); - return NULL; - } - PyTuple_SET_ITEM(v, 1, w); - - return v; + PyObject *v, *w; + + if (object == NULL) + return NULL; + + v = PyTuple_New(2); + if (v == NULL) { + Py_DECREF(object); + return NULL; + } + PyTuple_SET_ITEM(v, 0, object); + + w = PyLong_FromSsize_t(len); + if (w == NULL) { + Py_DECREF(v); + return NULL; + } + PyTuple_SET_ITEM(v, 1, w); + + return v; } static PyObject * internal_error_callback(const char *errors) { - if (errors == NULL || strcmp(errors, "strict") == 0) - return ERROR_STRICT; - else if (strcmp(errors, "ignore") == 0) - return ERROR_IGNORE; - else if (strcmp(errors, "replace") == 0) - return ERROR_REPLACE; - else - return PyUnicode_FromString(errors); + if (errors == NULL || strcmp(errors, "strict") == 0) + return ERROR_STRICT; + else if (strcmp(errors, "ignore") == 0) + return ERROR_IGNORE; + else if (strcmp(errors, "replace") == 0) + return ERROR_REPLACE; + else + return PyUnicode_FromString(errors); } static PyObject * call_error_callback(PyObject *errors, PyObject *exc) { - PyObject *args, *cb, *r; - const char *str; - - assert(PyUnicode_Check(errors)); - str = _PyUnicode_AsString(errors); - if (str == NULL) - return NULL; - cb = PyCodec_LookupError(str); - if (cb == NULL) - return NULL; - - args = PyTuple_New(1); - if (args == NULL) { - Py_DECREF(cb); - return NULL; - } - - PyTuple_SET_ITEM(args, 0, exc); - Py_INCREF(exc); - - r = PyObject_CallObject(cb, args); - Py_DECREF(args); - Py_DECREF(cb); - return r; + PyObject *args, *cb, *r; + const char *str; + + assert(PyUnicode_Check(errors)); + str = _PyUnicode_AsString(errors); + if (str == NULL) + return NULL; + cb = PyCodec_LookupError(str); + if (cb == NULL) + return NULL; + + args = PyTuple_New(1); + if (args == NULL) { + Py_DECREF(cb); + return NULL; + } + + PyTuple_SET_ITEM(args, 0, exc); + Py_INCREF(exc); + + r = PyObject_CallObject(cb, args); + Py_DECREF(args); + Py_DECREF(cb); + return r; } static PyObject * codecctx_errors_get(MultibyteStatefulCodecContext *self) { - const char *errors; - - if (self->errors == ERROR_STRICT) - errors = "strict"; - else if (self->errors == ERROR_IGNORE) - errors = "ignore"; - else if (self->errors == ERROR_REPLACE) - errors = "replace"; - else { - Py_INCREF(self->errors); - return self->errors; - } - - return PyUnicode_FromString(errors); + const char *errors; + + if (self->errors == ERROR_STRICT) + errors = "strict"; + else if (self->errors == ERROR_IGNORE) + errors = "ignore"; + else if (self->errors == ERROR_REPLACE) + errors = "replace"; + else { + Py_INCREF(self->errors); + return self->errors; + } + + return PyUnicode_FromString(errors); } static int codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value, - void *closure) + void *closure) { - PyObject *cb; - const char *str; + PyObject *cb; + const char *str; - if (!PyUnicode_Check(value)) { - PyErr_SetString(PyExc_TypeError, "errors must be a string"); - return -1; - } + if (!PyUnicode_Check(value)) { + PyErr_SetString(PyExc_TypeError, "errors must be a string"); + return -1; + } - str = _PyUnicode_AsString(value); - if (str == NULL) - return -1; + str = _PyUnicode_AsString(value); + if (str == NULL) + return -1; - cb = internal_error_callback(str); - if (cb == NULL) - return -1; + cb = internal_error_callback(str); + if (cb == NULL) + return -1; - ERROR_DECREF(self->errors); - self->errors = cb; - return 0; + ERROR_DECREF(self->errors); + self->errors = cb; + return 0; } /* This getset handlers list is used by all the stateful codec objects */ static PyGetSetDef codecctx_getsets[] = { - {"errors", (getter)codecctx_errors_get, - (setter)codecctx_errors_set, - PyDoc_STR("how to treat errors")}, - {NULL,} + {"errors", (getter)codecctx_errors_get, + (setter)codecctx_errors_set, + PyDoc_STR("how to treat errors")}, + {NULL,} }; static int expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize) { - Py_ssize_t orgpos, orgsize, incsize; + Py_ssize_t orgpos, orgsize, incsize; - orgpos = (Py_ssize_t)((char *)buf->outbuf - - PyBytes_AS_STRING(buf->outobj)); - orgsize = PyBytes_GET_SIZE(buf->outobj); - incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize); + orgpos = (Py_ssize_t)((char *)buf->outbuf - + PyBytes_AS_STRING(buf->outobj)); + orgsize = PyBytes_GET_SIZE(buf->outobj); + incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize); - if (orgsize > PY_SSIZE_T_MAX - incsize) - return -1; + if (orgsize > PY_SSIZE_T_MAX - incsize) + return -1; - if (_PyBytes_Resize(&buf->outobj, orgsize + incsize) == -1) - return -1; + if (_PyBytes_Resize(&buf->outobj, orgsize + incsize) == -1) + return -1; - buf->outbuf = (unsigned char *)PyBytes_AS_STRING(buf->outobj) +orgpos; - buf->outbuf_end = (unsigned char *)PyBytes_AS_STRING(buf->outobj) - + PyBytes_GET_SIZE(buf->outobj); + buf->outbuf = (unsigned char *)PyBytes_AS_STRING(buf->outobj) +orgpos; + buf->outbuf_end = (unsigned char *)PyBytes_AS_STRING(buf->outobj) + + PyBytes_GET_SIZE(buf->outobj); - return 0; + return 0; } -#define REQUIRE_ENCODEBUFFER(buf, s) { \ - if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \ - if (expand_encodebuffer(buf, s) == -1) \ - goto errorexit; \ +#define REQUIRE_ENCODEBUFFER(buf, s) { \ + if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \ + if (expand_encodebuffer(buf, s) == -1) \ + goto errorexit; \ } static int expand_decodebuffer(MultibyteDecodeBuffer *buf, Py_ssize_t esize) { - Py_ssize_t orgpos, orgsize; + Py_ssize_t orgpos, orgsize; - orgpos = (Py_ssize_t)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj)); - orgsize = PyUnicode_GET_SIZE(buf->outobj); - if (PyUnicode_Resize(&buf->outobj, orgsize + ( - esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1) - return -1; + orgpos = (Py_ssize_t)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj)); + orgsize = PyUnicode_GET_SIZE(buf->outobj); + if (PyUnicode_Resize(&buf->outobj, orgsize + ( + esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1) + return -1; - buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj) + orgpos; - buf->outbuf_end = PyUnicode_AS_UNICODE(buf->outobj) - + PyUnicode_GET_SIZE(buf->outobj); + buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj) + orgpos; + buf->outbuf_end = PyUnicode_AS_UNICODE(buf->outobj) + + PyUnicode_GET_SIZE(buf->outobj); - return 0; + return 0; } -#define REQUIRE_DECODEBUFFER(buf, s) { \ - if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \ - if (expand_decodebuffer(buf, s) == -1) \ - goto errorexit; \ +#define REQUIRE_DECODEBUFFER(buf, s) { \ + if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \ + if (expand_decodebuffer(buf, s) == -1) \ + goto errorexit; \ } @@ -227,504 +227,504 @@ expand_decodebuffer(MultibyteDecodeBuffer *buf, Py_ssize_t esize) static int multibytecodec_encerror(MultibyteCodec *codec, - MultibyteCodec_State *state, - MultibyteEncodeBuffer *buf, - PyObject *errors, Py_ssize_t e) + MultibyteCodec_State *state, + MultibyteEncodeBuffer *buf, + PyObject *errors, Py_ssize_t e) { - PyObject *retobj = NULL, *retstr = NULL, *tobj; - Py_ssize_t retstrsize, newpos; - Py_ssize_t esize, start, end; - const char *reason; - - if (e > 0) { - reason = "illegal multibyte sequence"; - esize = e; - } - else { - switch (e) { - case MBERR_TOOSMALL: - REQUIRE_ENCODEBUFFER(buf, -1); - return 0; /* retry it */ - case MBERR_TOOFEW: - reason = "incomplete multibyte sequence"; - esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); - break; - case MBERR_INTERNAL: - PyErr_SetString(PyExc_RuntimeError, - "internal codec error"); - return -1; - default: - PyErr_SetString(PyExc_RuntimeError, - "unknown runtime error"); - return -1; - } - } - - if (errors == ERROR_REPLACE) { - const Py_UNICODE replchar = '?', *inbuf = &replchar; - Py_ssize_t r; - - for (;;) { - Py_ssize_t outleft; - - outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf); - r = codec->encode(state, codec->config, &inbuf, 1, - &buf->outbuf, outleft, 0); - if (r == MBERR_TOOSMALL) { - REQUIRE_ENCODEBUFFER(buf, -1); - continue; - } - else - break; - } - - if (r != 0) { - REQUIRE_ENCODEBUFFER(buf, 1); - *buf->outbuf++ = '?'; - } - } - if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) { - buf->inbuf += esize; - return 0; - } - - start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top); - end = start + esize; - - /* use cached exception object if available */ - if (buf->excobj == NULL) { - buf->excobj = PyUnicodeEncodeError_Create(codec->encoding, - buf->inbuf_top, - buf->inbuf_end - buf->inbuf_top, - start, end, reason); - if (buf->excobj == NULL) - goto errorexit; - } - else - if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 || - PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 || - PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0) - goto errorexit; - - if (errors == ERROR_STRICT) { - PyCodec_StrictErrors(buf->excobj); - goto errorexit; - } - - retobj = call_error_callback(errors, buf->excobj); - if (retobj == NULL) - goto errorexit; - - if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || - !PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) || - !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) { - PyErr_SetString(PyExc_TypeError, - "encoding error handler must return " - "(unicode, int) tuple"); - goto errorexit; - } - - { - const Py_UNICODE *uraw = PyUnicode_AS_UNICODE(tobj); - - retstr = multibytecodec_encode(codec, state, &uraw, - PyUnicode_GET_SIZE(tobj), ERROR_STRICT, - MBENC_FLUSH); - if (retstr == NULL) - goto errorexit; - } - - assert(PyBytes_Check(retstr)); - retstrsize = PyBytes_GET_SIZE(retstr); - REQUIRE_ENCODEBUFFER(buf, retstrsize); - - memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize); - buf->outbuf += retstrsize; - - newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1)); - if (newpos < 0 && !PyErr_Occurred()) - newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top); - if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) { - PyErr_Clear(); - PyErr_Format(PyExc_IndexError, - "position %zd from error handler out of bounds", - newpos); - goto errorexit; - } - buf->inbuf = buf->inbuf_top + newpos; - - Py_DECREF(retobj); - Py_DECREF(retstr); - return 0; + PyObject *retobj = NULL, *retstr = NULL, *tobj; + Py_ssize_t retstrsize, newpos; + Py_ssize_t esize, start, end; + const char *reason; + + if (e > 0) { + reason = "illegal multibyte sequence"; + esize = e; + } + else { + switch (e) { + case MBERR_TOOSMALL: + REQUIRE_ENCODEBUFFER(buf, -1); + return 0; /* retry it */ + case MBERR_TOOFEW: + reason = "incomplete multibyte sequence"; + esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); + break; + case MBERR_INTERNAL: + PyErr_SetString(PyExc_RuntimeError, + "internal codec error"); + return -1; + default: + PyErr_SetString(PyExc_RuntimeError, + "unknown runtime error"); + return -1; + } + } + + if (errors == ERROR_REPLACE) { + const Py_UNICODE replchar = '?', *inbuf = &replchar; + Py_ssize_t r; + + for (;;) { + Py_ssize_t outleft; + + outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf); + r = codec->encode(state, codec->config, &inbuf, 1, + &buf->outbuf, outleft, 0); + if (r == MBERR_TOOSMALL) { + REQUIRE_ENCODEBUFFER(buf, -1); + continue; + } + else + break; + } + + if (r != 0) { + REQUIRE_ENCODEBUFFER(buf, 1); + *buf->outbuf++ = '?'; + } + } + if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) { + buf->inbuf += esize; + return 0; + } + + start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top); + end = start + esize; + + /* use cached exception object if available */ + if (buf->excobj == NULL) { + buf->excobj = PyUnicodeEncodeError_Create(codec->encoding, + buf->inbuf_top, + buf->inbuf_end - buf->inbuf_top, + start, end, reason); + if (buf->excobj == NULL) + goto errorexit; + } + else + if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 || + PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 || + PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0) + goto errorexit; + + if (errors == ERROR_STRICT) { + PyCodec_StrictErrors(buf->excobj); + goto errorexit; + } + + retobj = call_error_callback(errors, buf->excobj); + if (retobj == NULL) + goto errorexit; + + if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || + !PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) || + !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) { + PyErr_SetString(PyExc_TypeError, + "encoding error handler must return " + "(unicode, int) tuple"); + goto errorexit; + } + + { + const Py_UNICODE *uraw = PyUnicode_AS_UNICODE(tobj); + + retstr = multibytecodec_encode(codec, state, &uraw, + PyUnicode_GET_SIZE(tobj), ERROR_STRICT, + MBENC_FLUSH); + if (retstr == NULL) + goto errorexit; + } + + assert(PyBytes_Check(retstr)); + retstrsize = PyBytes_GET_SIZE(retstr); + REQUIRE_ENCODEBUFFER(buf, retstrsize); + + memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize); + buf->outbuf += retstrsize; + + newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1)); + if (newpos < 0 && !PyErr_Occurred()) + newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top); + if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) { + PyErr_Clear(); + PyErr_Format(PyExc_IndexError, + "position %zd from error handler out of bounds", + newpos); + goto errorexit; + } + buf->inbuf = buf->inbuf_top + newpos; + + Py_DECREF(retobj); + Py_DECREF(retstr); + return 0; errorexit: - Py_XDECREF(retobj); - Py_XDECREF(retstr); - return -1; + Py_XDECREF(retobj); + Py_XDECREF(retstr); + return -1; } static int multibytecodec_decerror(MultibyteCodec *codec, - MultibyteCodec_State *state, - MultibyteDecodeBuffer *buf, - PyObject *errors, Py_ssize_t e) + MultibyteCodec_State *state, + MultibyteDecodeBuffer *buf, + PyObject *errors, Py_ssize_t e) { - PyObject *retobj = NULL, *retuni = NULL; - Py_ssize_t retunisize, newpos; - const char *reason; - Py_ssize_t esize, start, end; - - if (e > 0) { - reason = "illegal multibyte sequence"; - esize = e; - } - else { - switch (e) { - case MBERR_TOOSMALL: - REQUIRE_DECODEBUFFER(buf, -1); - return 0; /* retry it */ - case MBERR_TOOFEW: - reason = "incomplete multibyte sequence"; - esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); - break; - case MBERR_INTERNAL: - PyErr_SetString(PyExc_RuntimeError, - "internal codec error"); - return -1; - default: - PyErr_SetString(PyExc_RuntimeError, - "unknown runtime error"); - return -1; - } - } - - if (errors == ERROR_REPLACE) { - REQUIRE_DECODEBUFFER(buf, 1); - *buf->outbuf++ = Py_UNICODE_REPLACEMENT_CHARACTER; - } - if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) { - buf->inbuf += esize; - return 0; - } - - start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top); - end = start + esize; - - /* use cached exception object if available */ - if (buf->excobj == NULL) { - buf->excobj = PyUnicodeDecodeError_Create(codec->encoding, - (const char *)buf->inbuf_top, - (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top), - start, end, reason); - if (buf->excobj == NULL) - goto errorexit; - } - else - if (PyUnicodeDecodeError_SetStart(buf->excobj, start) || - PyUnicodeDecodeError_SetEnd(buf->excobj, end) || - PyUnicodeDecodeError_SetReason(buf->excobj, reason)) - goto errorexit; - - if (errors == ERROR_STRICT) { - PyCodec_StrictErrors(buf->excobj); - goto errorexit; - } - - retobj = call_error_callback(errors, buf->excobj); - if (retobj == NULL) - goto errorexit; - - if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || - !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) || - !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) { - PyErr_SetString(PyExc_TypeError, - "decoding error handler must return " - "(unicode, int) tuple"); - goto errorexit; - } - - retunisize = PyUnicode_GET_SIZE(retuni); - if (retunisize > 0) { - REQUIRE_DECODEBUFFER(buf, retunisize); - memcpy((char *)buf->outbuf, PyUnicode_AS_DATA(retuni), - retunisize * Py_UNICODE_SIZE); - buf->outbuf += retunisize; - } - - newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1)); - if (newpos < 0 && !PyErr_Occurred()) - newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top); - if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) { - PyErr_Clear(); - PyErr_Format(PyExc_IndexError, - "position %zd from error handler out of bounds", - newpos); - goto errorexit; - } - buf->inbuf = buf->inbuf_top + newpos; - Py_DECREF(retobj); - return 0; + PyObject *retobj = NULL, *retuni = NULL; + Py_ssize_t retunisize, newpos; + const char *reason; + Py_ssize_t esize, start, end; + + if (e > 0) { + reason = "illegal multibyte sequence"; + esize = e; + } + else { + switch (e) { + case MBERR_TOOSMALL: + REQUIRE_DECODEBUFFER(buf, -1); + return 0; /* retry it */ + case MBERR_TOOFEW: + reason = "incomplete multibyte sequence"; + esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); + break; + case MBERR_INTERNAL: + PyErr_SetString(PyExc_RuntimeError, + "internal codec error"); + return -1; + default: + PyErr_SetString(PyExc_RuntimeError, + "unknown runtime error"); + return -1; + } + } + + if (errors == ERROR_REPLACE) { + REQUIRE_DECODEBUFFER(buf, 1); + *buf->outbuf++ = Py_UNICODE_REPLACEMENT_CHARACTER; + } + if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) { + buf->inbuf += esize; + return 0; + } + + start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top); + end = start + esize; + + /* use cached exception object if available */ + if (buf->excobj == NULL) { + buf->excobj = PyUnicodeDecodeError_Create(codec->encoding, + (const char *)buf->inbuf_top, + (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top), + start, end, reason); + if (buf->excobj == NULL) + goto errorexit; + } + else + if (PyUnicodeDecodeError_SetStart(buf->excobj, start) || + PyUnicodeDecodeError_SetEnd(buf->excobj, end) || + PyUnicodeDecodeError_SetReason(buf->excobj, reason)) + goto errorexit; + + if (errors == ERROR_STRICT) { + PyCodec_StrictErrors(buf->excobj); + goto errorexit; + } + + retobj = call_error_callback(errors, buf->excobj); + if (retobj == NULL) + goto errorexit; + + if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || + !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) || + !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) { + PyErr_SetString(PyExc_TypeError, + "decoding error handler must return " + "(unicode, int) tuple"); + goto errorexit; + } + + retunisize = PyUnicode_GET_SIZE(retuni); + if (retunisize > 0) { + REQUIRE_DECODEBUFFER(buf, retunisize); + memcpy((char *)buf->outbuf, PyUnicode_AS_DATA(retuni), + retunisize * Py_UNICODE_SIZE); + buf->outbuf += retunisize; + } + + newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1)); + if (newpos < 0 && !PyErr_Occurred()) + newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top); + if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) { + PyErr_Clear(); + PyErr_Format(PyExc_IndexError, + "position %zd from error handler out of bounds", + newpos); + goto errorexit; + } + buf->inbuf = buf->inbuf_top + newpos; + Py_DECREF(retobj); + return 0; errorexit: - Py_XDECREF(retobj); - return -1; + Py_XDECREF(retobj); + return -1; } static PyObject * multibytecodec_encode(MultibyteCodec *codec, - MultibyteCodec_State *state, - const Py_UNICODE **data, Py_ssize_t datalen, - PyObject *errors, int flags) + MultibyteCodec_State *state, + const Py_UNICODE **data, Py_ssize_t datalen, + PyObject *errors, int flags) { - MultibyteEncodeBuffer buf; - Py_ssize_t finalsize, r = 0; - - if (datalen == 0) - return PyBytes_FromStringAndSize(NULL, 0); - - buf.excobj = NULL; - buf.inbuf = buf.inbuf_top = *data; - buf.inbuf_end = buf.inbuf_top + datalen; - - if (datalen > (PY_SSIZE_T_MAX - 16) / 2) { - PyErr_NoMemory(); - goto errorexit; - } - - buf.outobj = PyBytes_FromStringAndSize(NULL, datalen * 2 + 16); - if (buf.outobj == NULL) - goto errorexit; - buf.outbuf = (unsigned char *)PyBytes_AS_STRING(buf.outobj); - buf.outbuf_end = buf.outbuf + PyBytes_GET_SIZE(buf.outobj); - - while (buf.inbuf < buf.inbuf_end) { - Py_ssize_t inleft, outleft; - - /* we don't reuse inleft and outleft here. - * error callbacks can relocate the cursor anywhere on buffer*/ - inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf); - outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); - r = codec->encode(state, codec->config, &buf.inbuf, inleft, - &buf.outbuf, outleft, flags); - if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH))) - break; - else if (multibytecodec_encerror(codec, state, &buf, errors,r)) - goto errorexit; - else if (r == MBERR_TOOFEW) - break; - } - - if (codec->encreset != NULL) - for (;;) { - Py_ssize_t outleft; - - outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); - r = codec->encreset(state, codec->config, &buf.outbuf, - outleft); - if (r == 0) - break; - else if (multibytecodec_encerror(codec, state, - &buf, errors, r)) - goto errorexit; - } - - finalsize = (Py_ssize_t)((char *)buf.outbuf - - PyBytes_AS_STRING(buf.outobj)); - - if (finalsize != PyBytes_GET_SIZE(buf.outobj)) - if (_PyBytes_Resize(&buf.outobj, finalsize) == -1) - goto errorexit; - - *data = buf.inbuf; - Py_XDECREF(buf.excobj); - return buf.outobj; + MultibyteEncodeBuffer buf; + Py_ssize_t finalsize, r = 0; + + if (datalen == 0) + return PyBytes_FromStringAndSize(NULL, 0); + + buf.excobj = NULL; + buf.inbuf = buf.inbuf_top = *data; + buf.inbuf_end = buf.inbuf_top + datalen; + + if (datalen > (PY_SSIZE_T_MAX - 16) / 2) { + PyErr_NoMemory(); + goto errorexit; + } + + buf.outobj = PyBytes_FromStringAndSize(NULL, datalen * 2 + 16); + if (buf.outobj == NULL) + goto errorexit; + buf.outbuf = (unsigned char *)PyBytes_AS_STRING(buf.outobj); + buf.outbuf_end = buf.outbuf + PyBytes_GET_SIZE(buf.outobj); + + while (buf.inbuf < buf.inbuf_end) { + Py_ssize_t inleft, outleft; + + /* we don't reuse inleft and outleft here. + * error callbacks can relocate the cursor anywhere on buffer*/ + inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf); + outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); + r = codec->encode(state, codec->config, &buf.inbuf, inleft, + &buf.outbuf, outleft, flags); + if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH))) + break; + else if (multibytecodec_encerror(codec, state, &buf, errors,r)) + goto errorexit; + else if (r == MBERR_TOOFEW) + break; + } + + if (codec->encreset != NULL) + for (;;) { + Py_ssize_t outleft; + + outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); + r = codec->encreset(state, codec->config, &buf.outbuf, + outleft); + if (r == 0) + break; + else if (multibytecodec_encerror(codec, state, + &buf, errors, r)) + goto errorexit; + } + + finalsize = (Py_ssize_t)((char *)buf.outbuf - + PyBytes_AS_STRING(buf.outobj)); + + if (finalsize != PyBytes_GET_SIZE(buf.outobj)) + if (_PyBytes_Resize(&buf.outobj, finalsize) == -1) + goto errorexit; + + *data = buf.inbuf; + Py_XDECREF(buf.excobj); + return buf.outobj; errorexit: - Py_XDECREF(buf.excobj); - Py_XDECREF(buf.outobj); - return NULL; + Py_XDECREF(buf.excobj); + Py_XDECREF(buf.outobj); + return NULL; } static PyObject * MultibyteCodec_Encode(MultibyteCodecObject *self, - PyObject *args, PyObject *kwargs) + PyObject *args, PyObject *kwargs) { - MultibyteCodec_State state; - Py_UNICODE *data; - PyObject *errorcb, *r, *arg, *ucvt; - const char *errors = NULL; - Py_ssize_t datalen; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|z:encode", - codeckwarglist, &arg, &errors)) - return NULL; - - if (PyUnicode_Check(arg)) - ucvt = NULL; - else { - arg = ucvt = PyObject_Str(arg); - if (arg == NULL) - return NULL; - else if (!PyUnicode_Check(arg)) { - PyErr_SetString(PyExc_TypeError, - "couldn't convert the object to unicode."); - Py_DECREF(ucvt); - return NULL; - } - } - - data = PyUnicode_AS_UNICODE(arg); - datalen = PyUnicode_GET_SIZE(arg); - - errorcb = internal_error_callback(errors); - if (errorcb == NULL) { - Py_XDECREF(ucvt); - return NULL; - } - - if (self->codec->encinit != NULL && - self->codec->encinit(&state, self->codec->config) != 0) - goto errorexit; - r = multibytecodec_encode(self->codec, &state, - (const Py_UNICODE **)&data, datalen, errorcb, - MBENC_FLUSH | MBENC_RESET); - if (r == NULL) - goto errorexit; - - ERROR_DECREF(errorcb); - Py_XDECREF(ucvt); - return make_tuple(r, datalen); + MultibyteCodec_State state; + Py_UNICODE *data; + PyObject *errorcb, *r, *arg, *ucvt; + const char *errors = NULL; + Py_ssize_t datalen; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|z:encode", + codeckwarglist, &arg, &errors)) + return NULL; + + if (PyUnicode_Check(arg)) + ucvt = NULL; + else { + arg = ucvt = PyObject_Str(arg); + if (arg == NULL) + return NULL; + else if (!PyUnicode_Check(arg)) { + PyErr_SetString(PyExc_TypeError, + "couldn't convert the object to unicode."); + Py_DECREF(ucvt); + return NULL; + } + } + + data = PyUnicode_AS_UNICODE(arg); + datalen = PyUnicode_GET_SIZE(arg); + + errorcb = internal_error_callback(errors); + if (errorcb == NULL) { + Py_XDECREF(ucvt); + return NULL; + } + + if (self->codec->encinit != NULL && + self->codec->encinit(&state, self->codec->config) != 0) + goto errorexit; + r = multibytecodec_encode(self->codec, &state, + (const Py_UNICODE **)&data, datalen, errorcb, + MBENC_FLUSH | MBENC_RESET); + if (r == NULL) + goto errorexit; + + ERROR_DECREF(errorcb); + Py_XDECREF(ucvt); + return make_tuple(r, datalen); errorexit: - ERROR_DECREF(errorcb); - Py_XDECREF(ucvt); - return NULL; + ERROR_DECREF(errorcb); + Py_XDECREF(ucvt); + return NULL; } static PyObject * MultibyteCodec_Decode(MultibyteCodecObject *self, - PyObject *args, PyObject *kwargs) + PyObject *args, PyObject *kwargs) { - MultibyteCodec_State state; - MultibyteDecodeBuffer buf; - PyObject *errorcb; - Py_buffer pdata; - const char *data, *errors = NULL; - Py_ssize_t datalen, finalsize; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|z:decode", - codeckwarglist, &pdata, &errors)) - return NULL; - data = pdata.buf; - datalen = pdata.len; - - errorcb = internal_error_callback(errors); - if (errorcb == NULL) { - PyBuffer_Release(&pdata); - return NULL; - } - - if (datalen == 0) { - PyBuffer_Release(&pdata); - ERROR_DECREF(errorcb); - return make_tuple(PyUnicode_FromUnicode(NULL, 0), 0); - } - - buf.excobj = NULL; - buf.inbuf = buf.inbuf_top = (unsigned char *)data; - buf.inbuf_end = buf.inbuf_top + datalen; - buf.outobj = PyUnicode_FromUnicode(NULL, datalen); - if (buf.outobj == NULL) - goto errorexit; - buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj); - buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj); - - if (self->codec->decinit != NULL && - self->codec->decinit(&state, self->codec->config) != 0) - goto errorexit; - - while (buf.inbuf < buf.inbuf_end) { - Py_ssize_t inleft, outleft, r; - - inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf); - outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); - - r = self->codec->decode(&state, self->codec->config, - &buf.inbuf, inleft, &buf.outbuf, outleft); - if (r == 0) - break; - else if (multibytecodec_decerror(self->codec, &state, - &buf, errorcb, r)) - goto errorexit; - } - - finalsize = (Py_ssize_t)(buf.outbuf - - PyUnicode_AS_UNICODE(buf.outobj)); - - if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) - if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) - goto errorexit; - - PyBuffer_Release(&pdata); - Py_XDECREF(buf.excobj); - ERROR_DECREF(errorcb); - return make_tuple(buf.outobj, datalen); + MultibyteCodec_State state; + MultibyteDecodeBuffer buf; + PyObject *errorcb; + Py_buffer pdata; + const char *data, *errors = NULL; + Py_ssize_t datalen, finalsize; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|z:decode", + codeckwarglist, &pdata, &errors)) + return NULL; + data = pdata.buf; + datalen = pdata.len; + + errorcb = internal_error_callback(errors); + if (errorcb == NULL) { + PyBuffer_Release(&pdata); + return NULL; + } + + if (datalen == 0) { + PyBuffer_Release(&pdata); + ERROR_DECREF(errorcb); + return make_tuple(PyUnicode_FromUnicode(NULL, 0), 0); + } + + buf.excobj = NULL; + buf.inbuf = buf.inbuf_top = (unsigned char *)data; + buf.inbuf_end = buf.inbuf_top + datalen; + buf.outobj = PyUnicode_FromUnicode(NULL, datalen); + if (buf.outobj == NULL) + goto errorexit; + buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj); + buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj); + + if (self->codec->decinit != NULL && + self->codec->decinit(&state, self->codec->config) != 0) + goto errorexit; + + while (buf.inbuf < buf.inbuf_end) { + Py_ssize_t inleft, outleft, r; + + inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf); + outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); + + r = self->codec->decode(&state, self->codec->config, + &buf.inbuf, inleft, &buf.outbuf, outleft); + if (r == 0) + break; + else if (multibytecodec_decerror(self->codec, &state, + &buf, errorcb, r)) + goto errorexit; + } + + finalsize = (Py_ssize_t)(buf.outbuf - + PyUnicode_AS_UNICODE(buf.outobj)); + + if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) + if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) + goto errorexit; + + PyBuffer_Release(&pdata); + Py_XDECREF(buf.excobj); + ERROR_DECREF(errorcb); + return make_tuple(buf.outobj, datalen); errorexit: - PyBuffer_Release(&pdata); - ERROR_DECREF(errorcb); - Py_XDECREF(buf.excobj); - Py_XDECREF(buf.outobj); + PyBuffer_Release(&pdata); + ERROR_DECREF(errorcb); + Py_XDECREF(buf.excobj); + Py_XDECREF(buf.outobj); - return NULL; + return NULL; } static struct PyMethodDef multibytecodec_methods[] = { - {"encode", (PyCFunction)MultibyteCodec_Encode, - METH_VARARGS | METH_KEYWORDS, - MultibyteCodec_Encode__doc__}, - {"decode", (PyCFunction)MultibyteCodec_Decode, - METH_VARARGS | METH_KEYWORDS, - MultibyteCodec_Decode__doc__}, - {NULL, NULL}, + {"encode", (PyCFunction)MultibyteCodec_Encode, + METH_VARARGS | METH_KEYWORDS, + MultibyteCodec_Encode__doc__}, + {"decode", (PyCFunction)MultibyteCodec_Decode, + METH_VARARGS | METH_KEYWORDS, + MultibyteCodec_Decode__doc__}, + {NULL, NULL}, }; static void multibytecodec_dealloc(MultibyteCodecObject *self) { - PyObject_Del(self); + PyObject_Del(self); } static PyTypeObject MultibyteCodec_Type = { - PyVarObject_HEAD_INIT(NULL, 0) - "MultibyteCodec", /* tp_name */ - sizeof(MultibyteCodecObject), /* tp_basicsize */ - 0, /* tp_itemsize */ - /* methods */ - (destructor)multibytecodec_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_reserved */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - PyObject_GenericGetAttr, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /* tp_flags */ - 0, /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iterext */ - multibytecodec_methods, /* tp_methods */ + PyVarObject_HEAD_INIT(NULL, 0) + "MultibyteCodec", /* tp_name */ + sizeof(MultibyteCodecObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)multibytecodec_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iterext */ + multibytecodec_methods, /* tp_methods */ }; @@ -732,150 +732,150 @@ static PyTypeObject MultibyteCodec_Type = { * Utility functions for stateful codec mechanism */ -#define STATEFUL_DCTX(o) ((MultibyteStatefulDecoderContext *)(o)) -#define STATEFUL_ECTX(o) ((MultibyteStatefulEncoderContext *)(o)) +#define STATEFUL_DCTX(o) ((MultibyteStatefulDecoderContext *)(o)) +#define STATEFUL_ECTX(o) ((MultibyteStatefulEncoderContext *)(o)) static PyObject * encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx, - PyObject *unistr, int final) + PyObject *unistr, int final) { - PyObject *ucvt, *r = NULL; - Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL; - Py_ssize_t datalen, origpending; - - if (PyUnicode_Check(unistr)) - ucvt = NULL; - else { - unistr = ucvt = PyObject_Str(unistr); - if (unistr == NULL) - return NULL; - else if (!PyUnicode_Check(unistr)) { - PyErr_SetString(PyExc_TypeError, - "couldn't convert the object to unicode."); - Py_DECREF(ucvt); - return NULL; - } - } - - datalen = PyUnicode_GET_SIZE(unistr); - origpending = ctx->pendingsize; - - if (origpending > 0) { - if (datalen > PY_SSIZE_T_MAX - ctx->pendingsize) { - PyErr_NoMemory(); - /* inbuf_tmp == NULL */ - goto errorexit; - } - inbuf_tmp = PyMem_New(Py_UNICODE, datalen + ctx->pendingsize); - if (inbuf_tmp == NULL) - goto errorexit; - memcpy(inbuf_tmp, ctx->pending, - Py_UNICODE_SIZE * ctx->pendingsize); - memcpy(inbuf_tmp + ctx->pendingsize, - PyUnicode_AS_UNICODE(unistr), - Py_UNICODE_SIZE * datalen); - datalen += ctx->pendingsize; - ctx->pendingsize = 0; - inbuf = inbuf_tmp; - } - else - inbuf = (Py_UNICODE *)PyUnicode_AS_UNICODE(unistr); - - inbuf_end = inbuf + datalen; - - r = multibytecodec_encode(ctx->codec, &ctx->state, - (const Py_UNICODE **)&inbuf, - datalen, ctx->errors, final ? MBENC_FLUSH : 0); - if (r == NULL) { - /* recover the original pending buffer */ - if (origpending > 0) - memcpy(ctx->pending, inbuf_tmp, - Py_UNICODE_SIZE * origpending); - ctx->pendingsize = origpending; - goto errorexit; - } - - if (inbuf < inbuf_end) { - ctx->pendingsize = (Py_ssize_t)(inbuf_end - inbuf); - if (ctx->pendingsize > MAXENCPENDING) { - /* normal codecs can't reach here */ - ctx->pendingsize = 0; - PyErr_SetString(PyExc_UnicodeError, - "pending buffer overflow"); - goto errorexit; - } - memcpy(ctx->pending, inbuf, - ctx->pendingsize * Py_UNICODE_SIZE); - } - - if (inbuf_tmp != NULL) - PyMem_Del(inbuf_tmp); - Py_XDECREF(ucvt); - return r; + PyObject *ucvt, *r = NULL; + Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL; + Py_ssize_t datalen, origpending; + + if (PyUnicode_Check(unistr)) + ucvt = NULL; + else { + unistr = ucvt = PyObject_Str(unistr); + if (unistr == NULL) + return NULL; + else if (!PyUnicode_Check(unistr)) { + PyErr_SetString(PyExc_TypeError, + "couldn't convert the object to unicode."); + Py_DECREF(ucvt); + return NULL; + } + } + + datalen = PyUnicode_GET_SIZE(unistr); + origpending = ctx->pendingsize; + + if (origpending > 0) { + if (datalen > PY_SSIZE_T_MAX - ctx->pendingsize) { + PyErr_NoMemory(); + /* inbuf_tmp == NULL */ + goto errorexit; + } + inbuf_tmp = PyMem_New(Py_UNICODE, datalen + ctx->pendingsize); + if (inbuf_tmp == NULL) + goto errorexit; + memcpy(inbuf_tmp, ctx->pending, + Py_UNICODE_SIZE * ctx->pendingsize); + memcpy(inbuf_tmp + ctx->pendingsize, + PyUnicode_AS_UNICODE(unistr), + Py_UNICODE_SIZE * datalen); + datalen += ctx->pendingsize; + ctx->pendingsize = 0; + inbuf = inbuf_tmp; + } + else + inbuf = (Py_UNICODE *)PyUnicode_AS_UNICODE(unistr); + + inbuf_end = inbuf + datalen; + + r = multibytecodec_encode(ctx->codec, &ctx->state, + (const Py_UNICODE **)&inbuf, + datalen, ctx->errors, final ? MBENC_FLUSH : 0); + if (r == NULL) { + /* recover the original pending buffer */ + if (origpending > 0) + memcpy(ctx->pending, inbuf_tmp, + Py_UNICODE_SIZE * origpending); + ctx->pendingsize = origpending; + goto errorexit; + } + + if (inbuf < inbuf_end) { + ctx->pendingsize = (Py_ssize_t)(inbuf_end - inbuf); + if (ctx->pendingsize > MAXENCPENDING) { + /* normal codecs can't reach here */ + ctx->pendingsize = 0; + PyErr_SetString(PyExc_UnicodeError, + "pending buffer overflow"); + goto errorexit; + } + memcpy(ctx->pending, inbuf, + ctx->pendingsize * Py_UNICODE_SIZE); + } + + if (inbuf_tmp != NULL) + PyMem_Del(inbuf_tmp); + Py_XDECREF(ucvt); + return r; errorexit: - if (inbuf_tmp != NULL) - PyMem_Del(inbuf_tmp); - Py_XDECREF(r); - Py_XDECREF(ucvt); - return NULL; + if (inbuf_tmp != NULL) + PyMem_Del(inbuf_tmp); + Py_XDECREF(r); + Py_XDECREF(ucvt); + return NULL; } static int decoder_append_pending(MultibyteStatefulDecoderContext *ctx, - MultibyteDecodeBuffer *buf) + MultibyteDecodeBuffer *buf) { - Py_ssize_t npendings; - - npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); - if (npendings + ctx->pendingsize > MAXDECPENDING || - npendings > PY_SSIZE_T_MAX - ctx->pendingsize) { - PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow"); - return -1; - } - memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings); - ctx->pendingsize += npendings; - return 0; + Py_ssize_t npendings; + + npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); + if (npendings + ctx->pendingsize > MAXDECPENDING || + npendings > PY_SSIZE_T_MAX - ctx->pendingsize) { + PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow"); + return -1; + } + memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings); + ctx->pendingsize += npendings; + return 0; } static int decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data, - Py_ssize_t size) + Py_ssize_t size) { - buf->inbuf = buf->inbuf_top = (const unsigned char *)data; - buf->inbuf_end = buf->inbuf_top + size; - if (buf->outobj == NULL) { /* only if outobj is not allocated yet */ - buf->outobj = PyUnicode_FromUnicode(NULL, size); - if (buf->outobj == NULL) - return -1; - buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj); - buf->outbuf_end = buf->outbuf + - PyUnicode_GET_SIZE(buf->outobj); - } - - return 0; + buf->inbuf = buf->inbuf_top = (const unsigned char *)data; + buf->inbuf_end = buf->inbuf_top + size; + if (buf->outobj == NULL) { /* only if outobj is not allocated yet */ + buf->outobj = PyUnicode_FromUnicode(NULL, size); + if (buf->outobj == NULL) + return -1; + buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj); + buf->outbuf_end = buf->outbuf + + PyUnicode_GET_SIZE(buf->outobj); + } + + return 0; } static int decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx, - MultibyteDecodeBuffer *buf) + MultibyteDecodeBuffer *buf) { - while (buf->inbuf < buf->inbuf_end) { - Py_ssize_t inleft, outleft; - Py_ssize_t r; - - inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); - outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf); - - r = ctx->codec->decode(&ctx->state, ctx->codec->config, - &buf->inbuf, inleft, &buf->outbuf, outleft); - if (r == 0 || r == MBERR_TOOFEW) - break; - else if (multibytecodec_decerror(ctx->codec, &ctx->state, - buf, ctx->errors, r)) - return -1; - } - return 0; + while (buf->inbuf < buf->inbuf_end) { + Py_ssize_t inleft, outleft; + Py_ssize_t r; + + inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); + outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf); + + r = ctx->codec->decode(&ctx->state, ctx->codec->config, + &buf->inbuf, inleft, &buf->outbuf, outleft); + if (r == 0 || r == MBERR_TOOFEW) + break; + else if (multibytecodec_decerror(ctx->codec, &ctx->state, + buf, ctx->errors, r)) + return -1; + } + return 0; } @@ -885,142 +885,142 @@ decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx, static PyObject * mbiencoder_encode(MultibyteIncrementalEncoderObject *self, - PyObject *args, PyObject *kwargs) + PyObject *args, PyObject *kwargs) { - PyObject *data; - int final = 0; + PyObject *data; + int final = 0; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:encode", - incrementalkwarglist, &data, &final)) - return NULL; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:encode", + incrementalkwarglist, &data, &final)) + return NULL; - return encoder_encode_stateful(STATEFUL_ECTX(self), data, final); + return encoder_encode_stateful(STATEFUL_ECTX(self), data, final); } static PyObject * mbiencoder_reset(MultibyteIncrementalEncoderObject *self) { - if (self->codec->decreset != NULL && - self->codec->decreset(&self->state, self->codec->config) != 0) - return NULL; - self->pendingsize = 0; + if (self->codec->decreset != NULL && + self->codec->decreset(&self->state, self->codec->config) != 0) + return NULL; + self->pendingsize = 0; - Py_RETURN_NONE; + Py_RETURN_NONE; } static struct PyMethodDef mbiencoder_methods[] = { - {"encode", (PyCFunction)mbiencoder_encode, - METH_VARARGS | METH_KEYWORDS, NULL}, - {"reset", (PyCFunction)mbiencoder_reset, - METH_NOARGS, NULL}, - {NULL, NULL}, + {"encode", (PyCFunction)mbiencoder_encode, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"reset", (PyCFunction)mbiencoder_reset, + METH_NOARGS, NULL}, + {NULL, NULL}, }; static PyObject * mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { - MultibyteIncrementalEncoderObject *self; - PyObject *codec = NULL; - char *errors = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder", - incnewkwarglist, &errors)) - return NULL; - - self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0); - if (self == NULL) - return NULL; - - codec = PyObject_GetAttrString((PyObject *)type, "codec"); - if (codec == NULL) - goto errorexit; - if (!MultibyteCodec_Check(codec)) { - PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); - goto errorexit; - } - - self->codec = ((MultibyteCodecObject *)codec)->codec; - self->pendingsize = 0; - self->errors = internal_error_callback(errors); - if (self->errors == NULL) - goto errorexit; - if (self->codec->encinit != NULL && - self->codec->encinit(&self->state, self->codec->config) != 0) - goto errorexit; - - Py_DECREF(codec); - return (PyObject *)self; + MultibyteIncrementalEncoderObject *self; + PyObject *codec = NULL; + char *errors = NULL; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder", + incnewkwarglist, &errors)) + return NULL; + + self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0); + if (self == NULL) + return NULL; + + codec = PyObject_GetAttrString((PyObject *)type, "codec"); + if (codec == NULL) + goto errorexit; + if (!MultibyteCodec_Check(codec)) { + PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); + goto errorexit; + } + + self->codec = ((MultibyteCodecObject *)codec)->codec; + self->pendingsize = 0; + self->errors = internal_error_callback(errors); + if (self->errors == NULL) + goto errorexit; + if (self->codec->encinit != NULL && + self->codec->encinit(&self->state, self->codec->config) != 0) + goto errorexit; + + Py_DECREF(codec); + return (PyObject *)self; errorexit: - Py_XDECREF(self); - Py_XDECREF(codec); - return NULL; + Py_XDECREF(self); + Py_XDECREF(codec); + return NULL; } static int mbiencoder_init(PyObject *self, PyObject *args, PyObject *kwds) { - return 0; + return 0; } static int mbiencoder_traverse(MultibyteIncrementalEncoderObject *self, - visitproc visit, void *arg) + visitproc visit, void *arg) { - if (ERROR_ISCUSTOM(self->errors)) - Py_VISIT(self->errors); - return 0; + if (ERROR_ISCUSTOM(self->errors)) + Py_VISIT(self->errors); + return 0; } static void mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self) { - PyObject_GC_UnTrack(self); - ERROR_DECREF(self->errors); - Py_TYPE(self)->tp_free(self); + PyObject_GC_UnTrack(self); + ERROR_DECREF(self->errors); + Py_TYPE(self)->tp_free(self); } static PyTypeObject MultibyteIncrementalEncoder_Type = { - PyVarObject_HEAD_INIT(NULL, 0) - "MultibyteIncrementalEncoder", /* tp_name */ - sizeof(MultibyteIncrementalEncoderObject), /* tp_basicsize */ - 0, /* tp_itemsize */ - /* methods */ - (destructor)mbiencoder_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_reserved */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - PyObject_GenericGetAttr, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC - | Py_TPFLAGS_BASETYPE, /* tp_flags */ - 0, /* tp_doc */ - (traverseproc)mbiencoder_traverse, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iterext */ - mbiencoder_methods, /* tp_methods */ - 0, /* tp_members */ - codecctx_getsets, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - mbiencoder_init, /* tp_init */ - 0, /* tp_alloc */ - mbiencoder_new, /* tp_new */ + PyVarObject_HEAD_INIT(NULL, 0) + "MultibyteIncrementalEncoder", /* tp_name */ + sizeof(MultibyteIncrementalEncoderObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)mbiencoder_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC + | Py_TPFLAGS_BASETYPE, /* tp_flags */ + 0, /* tp_doc */ + (traverseproc)mbiencoder_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iterext */ + mbiencoder_methods, /* tp_methods */ + 0, /* tp_members */ + codecctx_getsets, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + mbiencoder_init, /* tp_init */ + 0, /* tp_alloc */ + mbiencoder_new, /* tp_new */ }; @@ -1030,206 +1030,206 @@ static PyTypeObject MultibyteIncrementalEncoder_Type = { static PyObject * mbidecoder_decode(MultibyteIncrementalDecoderObject *self, - PyObject *args, PyObject *kwargs) + PyObject *args, PyObject *kwargs) { - MultibyteDecodeBuffer buf; - char *data, *wdata = NULL; - Py_buffer pdata; - Py_ssize_t wsize, finalsize = 0, size, origpending; - int final = 0; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|i:decode", - incrementalkwarglist, &pdata, &final)) - return NULL; - data = pdata.buf; - size = pdata.len; - - buf.outobj = buf.excobj = NULL; - origpending = self->pendingsize; - - if (self->pendingsize == 0) { - wsize = size; - wdata = data; - } - else { - if (size > PY_SSIZE_T_MAX - self->pendingsize) { - PyErr_NoMemory(); - goto errorexit; - } - wsize = size + self->pendingsize; - wdata = PyMem_Malloc(wsize); - if (wdata == NULL) - goto errorexit; - memcpy(wdata, self->pending, self->pendingsize); - memcpy(wdata + self->pendingsize, data, size); - self->pendingsize = 0; - } - - if (decoder_prepare_buffer(&buf, wdata, wsize) != 0) - goto errorexit; - - if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf)) - goto errorexit; - - if (final && buf.inbuf < buf.inbuf_end) { - if (multibytecodec_decerror(self->codec, &self->state, - &buf, self->errors, MBERR_TOOFEW)) { - /* recover the original pending buffer */ - memcpy(self->pending, wdata, origpending); - self->pendingsize = origpending; - goto errorexit; - } - } - - if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */ - if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0) - goto errorexit; - } - - finalsize = (Py_ssize_t)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj)); - if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) - if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) - goto errorexit; - - PyBuffer_Release(&pdata); - if (wdata != data) - PyMem_Del(wdata); - Py_XDECREF(buf.excobj); - return buf.outobj; + MultibyteDecodeBuffer buf; + char *data, *wdata = NULL; + Py_buffer pdata; + Py_ssize_t wsize, finalsize = 0, size, origpending; + int final = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|i:decode", + incrementalkwarglist, &pdata, &final)) + return NULL; + data = pdata.buf; + size = pdata.len; + + buf.outobj = buf.excobj = NULL; + origpending = self->pendingsize; + + if (self->pendingsize == 0) { + wsize = size; + wdata = data; + } + else { + if (size > PY_SSIZE_T_MAX - self->pendingsize) { + PyErr_NoMemory(); + goto errorexit; + } + wsize = size + self->pendingsize; + wdata = PyMem_Malloc(wsize); + if (wdata == NULL) + goto errorexit; + memcpy(wdata, self->pending, self->pendingsize); + memcpy(wdata + self->pendingsize, data, size); + self->pendingsize = 0; + } + + if (decoder_prepare_buffer(&buf, wdata, wsize) != 0) + goto errorexit; + + if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf)) + goto errorexit; + + if (final && buf.inbuf < buf.inbuf_end) { + if (multibytecodec_decerror(self->codec, &self->state, + &buf, self->errors, MBERR_TOOFEW)) { + /* recover the original pending buffer */ + memcpy(self->pending, wdata, origpending); + self->pendingsize = origpending; + goto errorexit; + } + } + + if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */ + if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0) + goto errorexit; + } + + finalsize = (Py_ssize_t)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj)); + if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) + if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) + goto errorexit; + + PyBuffer_Release(&pdata); + if (wdata != data) + PyMem_Del(wdata); + Py_XDECREF(buf.excobj); + return buf.outobj; errorexit: - PyBuffer_Release(&pdata); - if (wdata != NULL && wdata != data) - PyMem_Del(wdata); - Py_XDECREF(buf.excobj); - Py_XDECREF(buf.outobj); - return NULL; + PyBuffer_Release(&pdata); + if (wdata != NULL && wdata != data) + PyMem_Del(wdata); + Py_XDECREF(buf.excobj); + Py_XDECREF(buf.outobj); + return NULL; } static PyObject * mbidecoder_reset(MultibyteIncrementalDecoderObject *self) { - if (self->codec->decreset != NULL && - self->codec->decreset(&self->state, self->codec->config) != 0) - return NULL; - self->pendingsize = 0; + if (self->codec->decreset != NULL && + self->codec->decreset(&self->state, self->codec->config) != 0) + return NULL; + self->pendingsize = 0; - Py_RETURN_NONE; + Py_RETURN_NONE; } static struct PyMethodDef mbidecoder_methods[] = { - {"decode", (PyCFunction)mbidecoder_decode, - METH_VARARGS | METH_KEYWORDS, NULL}, - {"reset", (PyCFunction)mbidecoder_reset, - METH_NOARGS, NULL}, - {NULL, NULL}, + {"decode", (PyCFunction)mbidecoder_decode, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"reset", (PyCFunction)mbidecoder_reset, + METH_NOARGS, NULL}, + {NULL, NULL}, }; static PyObject * mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { - MultibyteIncrementalDecoderObject *self; - PyObject *codec = NULL; - char *errors = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder", - incnewkwarglist, &errors)) - return NULL; - - self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0); - if (self == NULL) - return NULL; - - codec = PyObject_GetAttrString((PyObject *)type, "codec"); - if (codec == NULL) - goto errorexit; - if (!MultibyteCodec_Check(codec)) { - PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); - goto errorexit; - } - - self->codec = ((MultibyteCodecObject *)codec)->codec; - self->pendingsize = 0; - self->errors = internal_error_callback(errors); - if (self->errors == NULL) - goto errorexit; - if (self->codec->decinit != NULL && - self->codec->decinit(&self->state, self->codec->config) != 0) - goto errorexit; - - Py_DECREF(codec); - return (PyObject *)self; + MultibyteIncrementalDecoderObject *self; + PyObject *codec = NULL; + char *errors = NULL; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder", + incnewkwarglist, &errors)) + return NULL; + + self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0); + if (self == NULL) + return NULL; + + codec = PyObject_GetAttrString((PyObject *)type, "codec"); + if (codec == NULL) + goto errorexit; + if (!MultibyteCodec_Check(codec)) { + PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); + goto errorexit; + } + + self->codec = ((MultibyteCodecObject *)codec)->codec; + self->pendingsize = 0; + self->errors = internal_error_callback(errors); + if (self->errors == NULL) + goto errorexit; + if (self->codec->decinit != NULL && + self->codec->decinit(&self->state, self->codec->config) != 0) + goto errorexit; + + Py_DECREF(codec); + return (PyObject *)self; errorexit: - Py_XDECREF(self); - Py_XDECREF(codec); - return NULL; + Py_XDECREF(self); + Py_XDECREF(codec); + return NULL; } static int mbidecoder_init(PyObject *self, PyObject *args, PyObject *kwds) { - return 0; + return 0; } static int mbidecoder_traverse(MultibyteIncrementalDecoderObject *self, - visitproc visit, void *arg) + visitproc visit, void *arg) { - if (ERROR_ISCUSTOM(self->errors)) - Py_VISIT(self->errors); - return 0; + if (ERROR_ISCUSTOM(self->errors)) + Py_VISIT(self->errors); + return 0; } static void mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self) { - PyObject_GC_UnTrack(self); - ERROR_DECREF(self->errors); - Py_TYPE(self)->tp_free(self); + PyObject_GC_UnTrack(self); + ERROR_DECREF(self->errors); + Py_TYPE(self)->tp_free(self); } static PyTypeObject MultibyteIncrementalDecoder_Type = { - PyVarObject_HEAD_INIT(NULL, 0) - "MultibyteIncrementalDecoder", /* tp_name */ - sizeof(MultibyteIncrementalDecoderObject), /* tp_basicsize */ - 0, /* tp_itemsize */ - /* methods */ - (destructor)mbidecoder_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_reserved */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - PyObject_GenericGetAttr, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC - | Py_TPFLAGS_BASETYPE, /* tp_flags */ - 0, /* tp_doc */ - (traverseproc)mbidecoder_traverse, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iterext */ - mbidecoder_methods, /* tp_methods */ - 0, /* tp_members */ - codecctx_getsets, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - mbidecoder_init, /* tp_init */ - 0, /* tp_alloc */ - mbidecoder_new, /* tp_new */ + PyVarObject_HEAD_INIT(NULL, 0) + "MultibyteIncrementalDecoder", /* tp_name */ + sizeof(MultibyteIncrementalDecoderObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)mbidecoder_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC + | Py_TPFLAGS_BASETYPE, /* tp_flags */ + 0, /* tp_doc */ + (traverseproc)mbidecoder_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iterext */ + mbidecoder_methods, /* tp_methods */ + 0, /* tp_members */ + codecctx_getsets, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + mbidecoder_init, /* tp_init */ + 0, /* tp_alloc */ + mbidecoder_new, /* tp_new */ }; @@ -1239,327 +1239,327 @@ static PyTypeObject MultibyteIncrementalDecoder_Type = { static PyObject * mbstreamreader_iread(MultibyteStreamReaderObject *self, - const char *method, Py_ssize_t sizehint) + const char *method, Py_ssize_t sizehint) { - MultibyteDecodeBuffer buf; - PyObject *cres; - Py_ssize_t rsize, finalsize = 0; - - if (sizehint == 0) - return PyUnicode_FromUnicode(NULL, 0); - - buf.outobj = buf.excobj = NULL; - cres = NULL; - - for (;;) { - int endoffile; - - if (sizehint < 0) - cres = PyObject_CallMethod(self->stream, - (char *)method, NULL); - else - cres = PyObject_CallMethod(self->stream, - (char *)method, "i", sizehint); - if (cres == NULL) - goto errorexit; - - if (!PyBytes_Check(cres)) { - PyErr_Format(PyExc_TypeError, - "stream function returned a " - "non-bytes object (%.100s)", - cres->ob_type->tp_name); - goto errorexit; - } - - endoffile = (PyBytes_GET_SIZE(cres) == 0); - - if (self->pendingsize > 0) { - PyObject *ctr; - char *ctrdata; - - if (PyBytes_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) { - PyErr_NoMemory(); - goto errorexit; - } - rsize = PyBytes_GET_SIZE(cres) + self->pendingsize; - ctr = PyBytes_FromStringAndSize(NULL, rsize); - if (ctr == NULL) - goto errorexit; - ctrdata = PyBytes_AS_STRING(ctr); - memcpy(ctrdata, self->pending, self->pendingsize); - memcpy(ctrdata + self->pendingsize, - PyBytes_AS_STRING(cres), - PyBytes_GET_SIZE(cres)); - Py_DECREF(cres); - cres = ctr; - self->pendingsize = 0; - } - - rsize = PyBytes_GET_SIZE(cres); - if (decoder_prepare_buffer(&buf, PyBytes_AS_STRING(cres), - rsize) != 0) - goto errorexit; - - if (rsize > 0 && decoder_feed_buffer( - (MultibyteStatefulDecoderContext *)self, &buf)) - goto errorexit; - - if (endoffile || sizehint < 0) { - if (buf.inbuf < buf.inbuf_end && - multibytecodec_decerror(self->codec, &self->state, - &buf, self->errors, MBERR_TOOFEW)) - goto errorexit; - } - - if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */ - if (decoder_append_pending(STATEFUL_DCTX(self), - &buf) != 0) - goto errorexit; - } - - finalsize = (Py_ssize_t)(buf.outbuf - - PyUnicode_AS_UNICODE(buf.outobj)); - Py_DECREF(cres); - cres = NULL; - - if (sizehint < 0 || finalsize != 0 || rsize == 0) - break; - - sizehint = 1; /* read 1 more byte and retry */ - } - - if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) - if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) - goto errorexit; - - Py_XDECREF(cres); - Py_XDECREF(buf.excobj); - return buf.outobj; + MultibyteDecodeBuffer buf; + PyObject *cres; + Py_ssize_t rsize, finalsize = 0; + + if (sizehint == 0) + return PyUnicode_FromUnicode(NULL, 0); + + buf.outobj = buf.excobj = NULL; + cres = NULL; + + for (;;) { + int endoffile; + + if (sizehint < 0) + cres = PyObject_CallMethod(self->stream, + (char *)method, NULL); + else + cres = PyObject_CallMethod(self->stream, + (char *)method, "i", sizehint); + if (cres == NULL) + goto errorexit; + + if (!PyBytes_Check(cres)) { + PyErr_Format(PyExc_TypeError, + "stream function returned a " + "non-bytes object (%.100s)", + cres->ob_type->tp_name); + goto errorexit; + } + + endoffile = (PyBytes_GET_SIZE(cres) == 0); + + if (self->pendingsize > 0) { + PyObject *ctr; + char *ctrdata; + + if (PyBytes_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) { + PyErr_NoMemory(); + goto errorexit; + } + rsize = PyBytes_GET_SIZE(cres) + self->pendingsize; + ctr = PyBytes_FromStringAndSize(NULL, rsize); + if (ctr == NULL) + goto errorexit; + ctrdata = PyBytes_AS_STRING(ctr); + memcpy(ctrdata, self->pending, self->pendingsize); + memcpy(ctrdata + self->pendingsize, + PyBytes_AS_STRING(cres), + PyBytes_GET_SIZE(cres)); + Py_DECREF(cres); + cres = ctr; + self->pendingsize = 0; + } + + rsize = PyBytes_GET_SIZE(cres); + if (decoder_prepare_buffer(&buf, PyBytes_AS_STRING(cres), + rsize) != 0) + goto errorexit; + + if (rsize > 0 && decoder_feed_buffer( + (MultibyteStatefulDecoderContext *)self, &buf)) + goto errorexit; + + if (endoffile || sizehint < 0) { + if (buf.inbuf < buf.inbuf_end && + multibytecodec_decerror(self->codec, &self->state, + &buf, self->errors, MBERR_TOOFEW)) + goto errorexit; + } + + if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */ + if (decoder_append_pending(STATEFUL_DCTX(self), + &buf) != 0) + goto errorexit; + } + + finalsize = (Py_ssize_t)(buf.outbuf - + PyUnicode_AS_UNICODE(buf.outobj)); + Py_DECREF(cres); + cres = NULL; + + if (sizehint < 0 || finalsize != 0 || rsize == 0) + break; + + sizehint = 1; /* read 1 more byte and retry */ + } + + if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) + if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) + goto errorexit; + + Py_XDECREF(cres); + Py_XDECREF(buf.excobj); + return buf.outobj; errorexit: - Py_XDECREF(cres); - Py_XDECREF(buf.excobj); - Py_XDECREF(buf.outobj); - return NULL; + Py_XDECREF(cres); + Py_XDECREF(buf.excobj); + Py_XDECREF(buf.outobj); + return NULL; } static PyObject * mbstreamreader_read(MultibyteStreamReaderObject *self, PyObject *args) { - PyObject *sizeobj = NULL; - Py_ssize_t size; + PyObject *sizeobj = NULL; + Py_ssize_t size; - if (!PyArg_UnpackTuple(args, "read", 0, 1, &sizeobj)) - return NULL; + if (!PyArg_UnpackTuple(args, "read", 0, 1, &sizeobj)) + return NULL; - if (sizeobj == Py_None || sizeobj == NULL) - size = -1; - else if (PyLong_Check(sizeobj)) - size = PyLong_AsSsize_t(sizeobj); - else { - PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); - return NULL; - } + if (sizeobj == Py_None || sizeobj == NULL) + size = -1; + else if (PyLong_Check(sizeobj)) + size = PyLong_AsSsize_t(sizeobj); + else { + PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); + return NULL; + } - if (size == -1 && PyErr_Occurred()) - return NULL; + if (size == -1 && PyErr_Occurred()) + return NULL; - return mbstreamreader_iread(self, "read", size); + return mbstreamreader_iread(self, "read", size); } static PyObject * mbstreamreader_readline(MultibyteStreamReaderObject *self, PyObject *args) { - PyObject *sizeobj = NULL; - Py_ssize_t size; + PyObject *sizeobj = NULL; + Py_ssize_t size; - if (!PyArg_UnpackTuple(args, "readline", 0, 1, &sizeobj)) - return NULL; + if (!PyArg_UnpackTuple(args, "readline", 0, 1, &sizeobj)) + return NULL; - if (sizeobj == Py_None || sizeobj == NULL) - size = -1; - else if (PyLong_Check(sizeobj)) - size = PyLong_AsSsize_t(sizeobj); - else { - PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); - return NULL; - } + if (sizeobj == Py_None || sizeobj == NULL) + size = -1; + else if (PyLong_Check(sizeobj)) + size = PyLong_AsSsize_t(sizeobj); + else { + PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); + return NULL; + } - if (size == -1 && PyErr_Occurred()) - return NULL; + if (size == -1 && PyErr_Occurred()) + return NULL; - return mbstreamreader_iread(self, "readline", size); + return mbstreamreader_iread(self, "readline", size); } static PyObject * mbstreamreader_readlines(MultibyteStreamReaderObject *self, PyObject *args) { - PyObject *sizehintobj = NULL, *r, *sr; - Py_ssize_t sizehint; - - if (!PyArg_UnpackTuple(args, "readlines", 0, 1, &sizehintobj)) - return NULL; - - if (sizehintobj == Py_None || sizehintobj == NULL) - sizehint = -1; - else if (PyLong_Check(sizehintobj)) - sizehint = PyLong_AsSsize_t(sizehintobj); - else { - PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); - return NULL; - } - - if (sizehint == -1 && PyErr_Occurred()) - return NULL; - - r = mbstreamreader_iread(self, "read", sizehint); - if (r == NULL) - return NULL; - - sr = PyUnicode_Splitlines(r, 1); - Py_DECREF(r); - return sr; + PyObject *sizehintobj = NULL, *r, *sr; + Py_ssize_t sizehint; + + if (!PyArg_UnpackTuple(args, "readlines", 0, 1, &sizehintobj)) + return NULL; + + if (sizehintobj == Py_None || sizehintobj == NULL) + sizehint = -1; + else if (PyLong_Check(sizehintobj)) + sizehint = PyLong_AsSsize_t(sizehintobj); + else { + PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); + return NULL; + } + + if (sizehint == -1 && PyErr_Occurred()) + return NULL; + + r = mbstreamreader_iread(self, "read", sizehint); + if (r == NULL) + return NULL; + + sr = PyUnicode_Splitlines(r, 1); + Py_DECREF(r); + return sr; } static PyObject * mbstreamreader_reset(MultibyteStreamReaderObject *self) { - if (self->codec->decreset != NULL && - self->codec->decreset(&self->state, self->codec->config) != 0) - return NULL; - self->pendingsize = 0; + if (self->codec->decreset != NULL && + self->codec->decreset(&self->state, self->codec->config) != 0) + return NULL; + self->pendingsize = 0; - Py_RETURN_NONE; + Py_RETURN_NONE; } static struct PyMethodDef mbstreamreader_methods[] = { - {"read", (PyCFunction)mbstreamreader_read, - METH_VARARGS, NULL}, - {"readline", (PyCFunction)mbstreamreader_readline, - METH_VARARGS, NULL}, - {"readlines", (PyCFunction)mbstreamreader_readlines, - METH_VARARGS, NULL}, - {"reset", (PyCFunction)mbstreamreader_reset, - METH_NOARGS, NULL}, - {NULL, NULL}, + {"read", (PyCFunction)mbstreamreader_read, + METH_VARARGS, NULL}, + {"readline", (PyCFunction)mbstreamreader_readline, + METH_VARARGS, NULL}, + {"readlines", (PyCFunction)mbstreamreader_readlines, + METH_VARARGS, NULL}, + {"reset", (PyCFunction)mbstreamreader_reset, + METH_NOARGS, NULL}, + {NULL, NULL}, }; static PyMemberDef mbstreamreader_members[] = { - {"stream", T_OBJECT, - offsetof(MultibyteStreamReaderObject, stream), - READONLY, NULL}, - {NULL,} + {"stream", T_OBJECT, + offsetof(MultibyteStreamReaderObject, stream), + READONLY, NULL}, + {NULL,} }; static PyObject * mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { - MultibyteStreamReaderObject *self; - PyObject *stream, *codec = NULL; - char *errors = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader", - streamkwarglist, &stream, &errors)) - return NULL; - - self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0); - if (self == NULL) - return NULL; - - codec = PyObject_GetAttrString((PyObject *)type, "codec"); - if (codec == NULL) - goto errorexit; - if (!MultibyteCodec_Check(codec)) { - PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); - goto errorexit; - } - - self->codec = ((MultibyteCodecObject *)codec)->codec; - self->stream = stream; - Py_INCREF(stream); - self->pendingsize = 0; - self->errors = internal_error_callback(errors); - if (self->errors == NULL) - goto errorexit; - if (self->codec->decinit != NULL && - self->codec->decinit(&self->state, self->codec->config) != 0) - goto errorexit; - - Py_DECREF(codec); - return (PyObject *)self; + MultibyteStreamReaderObject *self; + PyObject *stream, *codec = NULL; + char *errors = NULL; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader", + streamkwarglist, &stream, &errors)) + return NULL; + + self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0); + if (self == NULL) + return NULL; + + codec = PyObject_GetAttrString((PyObject *)type, "codec"); + if (codec == NULL) + goto errorexit; + if (!MultibyteCodec_Check(codec)) { + PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); + goto errorexit; + } + + self->codec = ((MultibyteCodecObject *)codec)->codec; + self->stream = stream; + Py_INCREF(stream); + self->pendingsize = 0; + self->errors = internal_error_callback(errors); + if (self->errors == NULL) + goto errorexit; + if (self->codec->decinit != NULL && + self->codec->decinit(&self->state, self->codec->config) != 0) + goto errorexit; + + Py_DECREF(codec); + return (PyObject *)self; errorexit: - Py_XDECREF(self); - Py_XDECREF(codec); - return NULL; + Py_XDECREF(self); + Py_XDECREF(codec); + return NULL; } static int mbstreamreader_init(PyObject *self, PyObject *args, PyObject *kwds) { - return 0; + return 0; } static int mbstreamreader_traverse(MultibyteStreamReaderObject *self, - visitproc visit, void *arg) + visitproc visit, void *arg) { - if (ERROR_ISCUSTOM(self->errors)) - Py_VISIT(self->errors); - Py_VISIT(self->stream); - return 0; + if (ERROR_ISCUSTOM(self->errors)) + Py_VISIT(self->errors); + Py_VISIT(self->stream); + return 0; } static void mbstreamreader_dealloc(MultibyteStreamReaderObject *self) { - PyObject_GC_UnTrack(self); - ERROR_DECREF(self->errors); - Py_XDECREF(self->stream); - Py_TYPE(self)->tp_free(self); + PyObject_GC_UnTrack(self); + ERROR_DECREF(self->errors); + Py_XDECREF(self->stream); + Py_TYPE(self)->tp_free(self); } static PyTypeObject MultibyteStreamReader_Type = { - PyVarObject_HEAD_INIT(NULL, 0) - "MultibyteStreamReader", /* tp_name */ - sizeof(MultibyteStreamReaderObject), /* tp_basicsize */ - 0, /* tp_itemsize */ - /* methods */ - (destructor)mbstreamreader_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_reserved */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - PyObject_GenericGetAttr, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC - | Py_TPFLAGS_BASETYPE, /* tp_flags */ - 0, /* tp_doc */ - (traverseproc)mbstreamreader_traverse, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iterext */ - mbstreamreader_methods, /* tp_methods */ - mbstreamreader_members, /* tp_members */ - codecctx_getsets, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - mbstreamreader_init, /* tp_init */ - 0, /* tp_alloc */ - mbstreamreader_new, /* tp_new */ + PyVarObject_HEAD_INIT(NULL, 0) + "MultibyteStreamReader", /* tp_name */ + sizeof(MultibyteStreamReaderObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)mbstreamreader_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC + | Py_TPFLAGS_BASETYPE, /* tp_flags */ + 0, /* tp_doc */ + (traverseproc)mbstreamreader_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iterext */ + mbstreamreader_methods, /* tp_methods */ + mbstreamreader_members, /* tp_members */ + codecctx_getsets, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + mbstreamreader_init, /* tp_init */ + 0, /* tp_alloc */ + mbstreamreader_new, /* tp_new */ }; @@ -1569,217 +1569,217 @@ static PyTypeObject MultibyteStreamReader_Type = { static int mbstreamwriter_iwrite(MultibyteStreamWriterObject *self, - PyObject *unistr) + PyObject *unistr) { - PyObject *str, *wr; + PyObject *str, *wr; - str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0); - if (str == NULL) - return -1; + str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0); + if (str == NULL) + return -1; - wr = PyObject_CallMethod(self->stream, "write", "O", str); - Py_DECREF(str); - if (wr == NULL) - return -1; + wr = PyObject_CallMethod(self->stream, "write", "O", str); + Py_DECREF(str); + if (wr == NULL) + return -1; - Py_DECREF(wr); - return 0; + Py_DECREF(wr); + return 0; } static PyObject * mbstreamwriter_write(MultibyteStreamWriterObject *self, PyObject *strobj) { - if (mbstreamwriter_iwrite(self, strobj)) - return NULL; - else - Py_RETURN_NONE; + if (mbstreamwriter_iwrite(self, strobj)) + return NULL; + else + Py_RETURN_NONE; } static PyObject * mbstreamwriter_writelines(MultibyteStreamWriterObject *self, PyObject *lines) { - PyObject *strobj; - int i, r; - - if (!PySequence_Check(lines)) { - PyErr_SetString(PyExc_TypeError, - "arg must be a sequence object"); - return NULL; - } - - for (i = 0; i < PySequence_Length(lines); i++) { - /* length can be changed even within this loop */ - strobj = PySequence_GetItem(lines, i); - if (strobj == NULL) - return NULL; - - r = mbstreamwriter_iwrite(self, strobj); - Py_DECREF(strobj); - if (r == -1) - return NULL; - } - - Py_RETURN_NONE; + PyObject *strobj; + int i, r; + + if (!PySequence_Check(lines)) { + PyErr_SetString(PyExc_TypeError, + "arg must be a sequence object"); + return NULL; + } + + for (i = 0; i < PySequence_Length(lines); i++) { + /* length can be changed even within this loop */ + strobj = PySequence_GetItem(lines, i); + if (strobj == NULL) + return NULL; + + r = mbstreamwriter_iwrite(self, strobj); + Py_DECREF(strobj); + if (r == -1) + return NULL; + } + + Py_RETURN_NONE; } static PyObject * mbstreamwriter_reset(MultibyteStreamWriterObject *self) { - const Py_UNICODE *pending; - PyObject *pwrt; - - pending = self->pending; - pwrt = multibytecodec_encode(self->codec, &self->state, - &pending, self->pendingsize, self->errors, - MBENC_FLUSH | MBENC_RESET); - /* some pending buffer can be truncated when UnicodeEncodeError is - * raised on 'strict' mode. but, 'reset' method is designed to - * reset the pending buffer or states so failed string sequence - * ought to be missed */ - self->pendingsize = 0; - if (pwrt == NULL) - return NULL; - - assert(PyBytes_Check(pwrt)); - if (PyBytes_Size(pwrt) > 0) { - PyObject *wr; - wr = PyObject_CallMethod(self->stream, "write", "O", pwrt); - if (wr == NULL) { - Py_DECREF(pwrt); - return NULL; - } - } - Py_DECREF(pwrt); - - Py_RETURN_NONE; + const Py_UNICODE *pending; + PyObject *pwrt; + + pending = self->pending; + pwrt = multibytecodec_encode(self->codec, &self->state, + &pending, self->pendingsize, self->errors, + MBENC_FLUSH | MBENC_RESET); + /* some pending buffer can be truncated when UnicodeEncodeError is + * raised on 'strict' mode. but, 'reset' method is designed to + * reset the pending buffer or states so failed string sequence + * ought to be missed */ + self->pendingsize = 0; + if (pwrt == NULL) + return NULL; + + assert(PyBytes_Check(pwrt)); + if (PyBytes_Size(pwrt) > 0) { + PyObject *wr; + wr = PyObject_CallMethod(self->stream, "write", "O", pwrt); + if (wr == NULL) { + Py_DECREF(pwrt); + return NULL; + } + } + Py_DECREF(pwrt); + + Py_RETURN_NONE; } static PyObject * mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { - MultibyteStreamWriterObject *self; - PyObject *stream, *codec = NULL; - char *errors = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter", - streamkwarglist, &stream, &errors)) - return NULL; - - self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0); - if (self == NULL) - return NULL; - - codec = PyObject_GetAttrString((PyObject *)type, "codec"); - if (codec == NULL) - goto errorexit; - if (!MultibyteCodec_Check(codec)) { - PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); - goto errorexit; - } - - self->codec = ((MultibyteCodecObject *)codec)->codec; - self->stream = stream; - Py_INCREF(stream); - self->pendingsize = 0; - self->errors = internal_error_callback(errors); - if (self->errors == NULL) - goto errorexit; - if (self->codec->encinit != NULL && - self->codec->encinit(&self->state, self->codec->config) != 0) - goto errorexit; - - Py_DECREF(codec); - return (PyObject *)self; + MultibyteStreamWriterObject *self; + PyObject *stream, *codec = NULL; + char *errors = NULL; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter", + streamkwarglist, &stream, &errors)) + return NULL; + + self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0); + if (self == NULL) + return NULL; + + codec = PyObject_GetAttrString((PyObject *)type, "codec"); + if (codec == NULL) + goto errorexit; + if (!MultibyteCodec_Check(codec)) { + PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); + goto errorexit; + } + + self->codec = ((MultibyteCodecObject *)codec)->codec; + self->stream = stream; + Py_INCREF(stream); + self->pendingsize = 0; + self->errors = internal_error_callback(errors); + if (self->errors == NULL) + goto errorexit; + if (self->codec->encinit != NULL && + self->codec->encinit(&self->state, self->codec->config) != 0) + goto errorexit; + + Py_DECREF(codec); + return (PyObject *)self; errorexit: - Py_XDECREF(self); - Py_XDECREF(codec); - return NULL; + Py_XDECREF(self); + Py_XDECREF(codec); + return NULL; } static int mbstreamwriter_init(PyObject *self, PyObject *args, PyObject *kwds) { - return 0; + return 0; } static int mbstreamwriter_traverse(MultibyteStreamWriterObject *self, - visitproc visit, void *arg) + visitproc visit, void *arg) { - if (ERROR_ISCUSTOM(self->errors)) - Py_VISIT(self->errors); - Py_VISIT(self->stream); - return 0; + if (ERROR_ISCUSTOM(self->errors)) + Py_VISIT(self->errors); + Py_VISIT(self->stream); + return 0; } static void mbstreamwriter_dealloc(MultibyteStreamWriterObject *self) { - PyObject_GC_UnTrack(self); - ERROR_DECREF(self->errors); - Py_XDECREF(self->stream); - Py_TYPE(self)->tp_free(self); + PyObject_GC_UnTrack(self); + ERROR_DECREF(self->errors); + Py_XDECREF(self->stream); + Py_TYPE(self)->tp_free(self); } static struct PyMethodDef mbstreamwriter_methods[] = { - {"write", (PyCFunction)mbstreamwriter_write, - METH_O, NULL}, - {"writelines", (PyCFunction)mbstreamwriter_writelines, - METH_O, NULL}, - {"reset", (PyCFunction)mbstreamwriter_reset, - METH_NOARGS, NULL}, - {NULL, NULL}, + {"write", (PyCFunction)mbstreamwriter_write, + METH_O, NULL}, + {"writelines", (PyCFunction)mbstreamwriter_writelines, + METH_O, NULL}, + {"reset", (PyCFunction)mbstreamwriter_reset, + METH_NOARGS, NULL}, + {NULL, NULL}, }; static PyMemberDef mbstreamwriter_members[] = { - {"stream", T_OBJECT, - offsetof(MultibyteStreamWriterObject, stream), - READONLY, NULL}, - {NULL,} + {"stream", T_OBJECT, + offsetof(MultibyteStreamWriterObject, stream), + READONLY, NULL}, + {NULL,} }; static PyTypeObject MultibyteStreamWriter_Type = { - PyVarObject_HEAD_INIT(NULL, 0) - "MultibyteStreamWriter", /* tp_name */ - sizeof(MultibyteStreamWriterObject), /* tp_basicsize */ - 0, /* tp_itemsize */ - /* methods */ - (destructor)mbstreamwriter_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_reserved */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - PyObject_GenericGetAttr, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC - | Py_TPFLAGS_BASETYPE, /* tp_flags */ - 0, /* tp_doc */ - (traverseproc)mbstreamwriter_traverse, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iterext */ - mbstreamwriter_methods, /* tp_methods */ - mbstreamwriter_members, /* tp_members */ - codecctx_getsets, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - mbstreamwriter_init, /* tp_init */ - 0, /* tp_alloc */ - mbstreamwriter_new, /* tp_new */ + PyVarObject_HEAD_INIT(NULL, 0) + "MultibyteStreamWriter", /* tp_name */ + sizeof(MultibyteStreamWriterObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)mbstreamwriter_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC + | Py_TPFLAGS_BASETYPE, /* tp_flags */ + 0, /* tp_doc */ + (traverseproc)mbstreamwriter_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iterext */ + mbstreamwriter_methods, /* tp_methods */ + mbstreamwriter_members, /* tp_members */ + codecctx_getsets, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + mbstreamwriter_init, /* tp_init */ + 0, /* tp_alloc */ + mbstreamwriter_new, /* tp_new */ }; @@ -1790,76 +1790,76 @@ static PyTypeObject MultibyteStreamWriter_Type = { static PyObject * __create_codec(PyObject *ignore, PyObject *arg) { - MultibyteCodecObject *self; - MultibyteCodec *codec; + MultibyteCodecObject *self; + MultibyteCodec *codec; - if (!PyCapsule_IsValid(arg, PyMultibyteCodec_CAPSULE_NAME)) { - PyErr_SetString(PyExc_ValueError, "argument type invalid"); - return NULL; - } + if (!PyCapsule_IsValid(arg, PyMultibyteCodec_CAPSULE_NAME)) { + PyErr_SetString(PyExc_ValueError, "argument type invalid"); + return NULL; + } - codec = PyCapsule_GetPointer(arg, PyMultibyteCodec_CAPSULE_NAME); - if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0) - return NULL; + codec = PyCapsule_GetPointer(arg, PyMultibyteCodec_CAPSULE_NAME); + if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0) + return NULL; - self = PyObject_New(MultibyteCodecObject, &MultibyteCodec_Type); - if (self == NULL) - return NULL; - self->codec = codec; + self = PyObject_New(MultibyteCodecObject, &MultibyteCodec_Type); + if (self == NULL) + return NULL; + self->codec = codec; - return (PyObject *)self; + return (PyObject *)self; } static struct PyMethodDef __methods[] = { - {"__create_codec", (PyCFunction)__create_codec, METH_O}, - {NULL, NULL}, + {"__create_codec", (PyCFunction)__create_codec, METH_O}, + {NULL, NULL}, }; static struct PyModuleDef _multibytecodecmodule = { - PyModuleDef_HEAD_INIT, - "_multibytecodec", - NULL, - -1, - __methods, - NULL, - NULL, - NULL, - NULL + PyModuleDef_HEAD_INIT, + "_multibytecodec", + NULL, + -1, + __methods, + NULL, + NULL, + NULL, + NULL }; PyMODINIT_FUNC PyInit__multibytecodec(void) { - int i; - PyObject *m; - PyTypeObject *typelist[] = { - &MultibyteIncrementalEncoder_Type, - &MultibyteIncrementalDecoder_Type, - &MultibyteStreamReader_Type, - &MultibyteStreamWriter_Type, - NULL - }; - - if (PyType_Ready(&MultibyteCodec_Type) < 0) - return NULL; - - m = PyModule_Create(&_multibytecodecmodule); - if (m == NULL) - return NULL; - - for (i = 0; typelist[i] != NULL; i++) { - if (PyType_Ready(typelist[i]) < 0) - return NULL; - Py_INCREF(typelist[i]); - PyModule_AddObject(m, typelist[i]->tp_name, - (PyObject *)typelist[i]); - } - - if (PyErr_Occurred()) { - Py_FatalError("can't initialize the _multibytecodec module"); - Py_DECREF(m); - m = NULL; - } - return m; + int i; + PyObject *m; + PyTypeObject *typelist[] = { + &MultibyteIncrementalEncoder_Type, + &MultibyteIncrementalDecoder_Type, + &MultibyteStreamReader_Type, + &MultibyteStreamWriter_Type, + NULL + }; + + if (PyType_Ready(&MultibyteCodec_Type) < 0) + return NULL; + + m = PyModule_Create(&_multibytecodecmodule); + if (m == NULL) + return NULL; + + for (i = 0; typelist[i] != NULL; i++) { + if (PyType_Ready(typelist[i]) < 0) + return NULL; + Py_INCREF(typelist[i]); + PyModule_AddObject(m, typelist[i]->tp_name, + (PyObject *)typelist[i]); + } + + if (PyErr_Occurred()) { + Py_FatalError("can't initialize the _multibytecodec module"); + Py_DECREF(m); + m = NULL; + } + return m; } diff --git a/Modules/cjkcodecs/multibytecodec.h b/Modules/cjkcodecs/multibytecodec.h index 71c02cc..1b6ef55 100644 --- a/Modules/cjkcodecs/multibytecodec.h +++ b/Modules/cjkcodecs/multibytecodec.h @@ -23,114 +23,114 @@ typedef unsigned short ucs2_t, DBCHAR; #endif typedef union { - void *p; - int i; - unsigned char c[8]; - ucs2_t u2[4]; - ucs4_t u4[2]; + void *p; + int i; + unsigned char c[8]; + ucs2_t u2[4]; + ucs4_t u4[2]; } MultibyteCodec_State; typedef int (*mbcodec_init)(const void *config); typedef Py_ssize_t (*mbencode_func)(MultibyteCodec_State *state, - const void *config, - const Py_UNICODE **inbuf, Py_ssize_t inleft, - unsigned char **outbuf, Py_ssize_t outleft, - int flags); + const void *config, + const Py_UNICODE **inbuf, Py_ssize_t inleft, + unsigned char **outbuf, Py_ssize_t outleft, + int flags); typedef int (*mbencodeinit_func)(MultibyteCodec_State *state, - const void *config); + const void *config); typedef Py_ssize_t (*mbencodereset_func)(MultibyteCodec_State *state, - const void *config, - unsigned char **outbuf, Py_ssize_t outleft); + const void *config, + unsigned char **outbuf, Py_ssize_t outleft); typedef Py_ssize_t (*mbdecode_func)(MultibyteCodec_State *state, - const void *config, - const unsigned char **inbuf, Py_ssize_t inleft, - Py_UNICODE **outbuf, Py_ssize_t outleft); + const void *config, + const unsigned char **inbuf, Py_ssize_t inleft, + Py_UNICODE **outbuf, Py_ssize_t outleft); typedef int (*mbdecodeinit_func)(MultibyteCodec_State *state, - const void *config); + const void *config); typedef Py_ssize_t (*mbdecodereset_func)(MultibyteCodec_State *state, - const void *config); + const void *config); typedef struct { - const char *encoding; - const void *config; - mbcodec_init codecinit; - mbencode_func encode; - mbencodeinit_func encinit; - mbencodereset_func encreset; - mbdecode_func decode; - mbdecodeinit_func decinit; - mbdecodereset_func decreset; + const char *encoding; + const void *config; + mbcodec_init codecinit; + mbencode_func encode; + mbencodeinit_func encinit; + mbencodereset_func encreset; + mbdecode_func decode; + mbdecodeinit_func decinit; + mbdecodereset_func decreset; } MultibyteCodec; typedef struct { - PyObject_HEAD - MultibyteCodec *codec; + PyObject_HEAD + MultibyteCodec *codec; } MultibyteCodecObject; #define MultibyteCodec_Check(op) ((op)->ob_type == &MultibyteCodec_Type) -#define _MultibyteStatefulCodec_HEAD \ - PyObject_HEAD \ - MultibyteCodec *codec; \ - MultibyteCodec_State state; \ - PyObject *errors; +#define _MultibyteStatefulCodec_HEAD \ + PyObject_HEAD \ + MultibyteCodec *codec; \ + MultibyteCodec_State state; \ + PyObject *errors; typedef struct { - _MultibyteStatefulCodec_HEAD + _MultibyteStatefulCodec_HEAD } MultibyteStatefulCodecContext; -#define MAXENCPENDING 2 -#define _MultibyteStatefulEncoder_HEAD \ - _MultibyteStatefulCodec_HEAD \ - Py_UNICODE pending[MAXENCPENDING]; \ - Py_ssize_t pendingsize; +#define MAXENCPENDING 2 +#define _MultibyteStatefulEncoder_HEAD \ + _MultibyteStatefulCodec_HEAD \ + Py_UNICODE pending[MAXENCPENDING]; \ + Py_ssize_t pendingsize; typedef struct { - _MultibyteStatefulEncoder_HEAD + _MultibyteStatefulEncoder_HEAD } MultibyteStatefulEncoderContext; -#define MAXDECPENDING 8 -#define _MultibyteStatefulDecoder_HEAD \ - _MultibyteStatefulCodec_HEAD \ - unsigned char pending[MAXDECPENDING]; \ - Py_ssize_t pendingsize; +#define MAXDECPENDING 8 +#define _MultibyteStatefulDecoder_HEAD \ + _MultibyteStatefulCodec_HEAD \ + unsigned char pending[MAXDECPENDING]; \ + Py_ssize_t pendingsize; typedef struct { - _MultibyteStatefulDecoder_HEAD + _MultibyteStatefulDecoder_HEAD } MultibyteStatefulDecoderContext; typedef struct { - _MultibyteStatefulEncoder_HEAD + _MultibyteStatefulEncoder_HEAD } MultibyteIncrementalEncoderObject; typedef struct { - _MultibyteStatefulDecoder_HEAD + _MultibyteStatefulDecoder_HEAD } MultibyteIncrementalDecoderObject; typedef struct { - _MultibyteStatefulDecoder_HEAD - PyObject *stream; + _MultibyteStatefulDecoder_HEAD + PyObject *stream; } MultibyteStreamReaderObject; typedef struct { - _MultibyteStatefulEncoder_HEAD - PyObject *stream; + _MultibyteStatefulEncoder_HEAD + PyObject *stream; } MultibyteStreamWriterObject; /* positive values for illegal sequences */ -#define MBERR_TOOSMALL (-1) /* insufficient output buffer space */ -#define MBERR_TOOFEW (-2) /* incomplete input buffer */ -#define MBERR_INTERNAL (-3) /* internal runtime error */ - -#define ERROR_STRICT (PyObject *)(1) -#define ERROR_IGNORE (PyObject *)(2) -#define ERROR_REPLACE (PyObject *)(3) -#define ERROR_ISCUSTOM(p) ((p) < ERROR_STRICT || ERROR_REPLACE < (p)) -#define ERROR_DECREF(p) do { \ - if (p != NULL && ERROR_ISCUSTOM(p)) { \ - Py_DECREF(p); \ - } \ +#define MBERR_TOOSMALL (-1) /* insufficient output buffer space */ +#define MBERR_TOOFEW (-2) /* incomplete input buffer */ +#define MBERR_INTERNAL (-3) /* internal runtime error */ + +#define ERROR_STRICT (PyObject *)(1) +#define ERROR_IGNORE (PyObject *)(2) +#define ERROR_REPLACE (PyObject *)(3) +#define ERROR_ISCUSTOM(p) ((p) < ERROR_STRICT || ERROR_REPLACE < (p)) +#define ERROR_DECREF(p) do { \ + if (p != NULL && ERROR_ISCUSTOM(p)) { \ + Py_DECREF(p); \ + } \ } while (0); -#define MBENC_FLUSH 0x0001 /* encode all characters encodable */ -#define MBENC_MAX MBENC_FLUSH +#define MBENC_FLUSH 0x0001 /* encode all characters encodable */ +#define MBENC_MAX MBENC_FLUSH #define PyMultibyteCodec_CAPSULE_NAME "multibytecodec.__map_*" |