diff options
Diffstat (limited to 'Modules/cjkcodecs')
-rw-r--r-- | Modules/cjkcodecs/_codecs_cn.c | 210 | ||||
-rw-r--r-- | Modules/cjkcodecs/_codecs_hk.c | 72 | ||||
-rw-r--r-- | Modules/cjkcodecs/_codecs_iso2022.c | 283 | ||||
-rw-r--r-- | Modules/cjkcodecs/_codecs_jp.c | 325 | ||||
-rw-r--r-- | Modules/cjkcodecs/_codecs_kr.c | 152 | ||||
-rw-r--r-- | Modules/cjkcodecs/_codecs_tw.c | 60 | ||||
-rw-r--r-- | Modules/cjkcodecs/alg_jisx0201.h | 21 | ||||
-rw-r--r-- | Modules/cjkcodecs/cjkcodecs.h | 158 | ||||
-rw-r--r-- | Modules/cjkcodecs/emu_jisx0213_2000.h | 5 | ||||
-rw-r--r-- | Modules/cjkcodecs/mappings_cn.h | 2 | ||||
-rw-r--r-- | Modules/cjkcodecs/mappings_jisx0213_pair.h | 2 | ||||
-rw-r--r-- | Modules/cjkcodecs/multibytecodec.c | 334 | ||||
-rw-r--r-- | Modules/cjkcodecs/multibytecodec.h | 16 |
13 files changed, 797 insertions, 843 deletions
diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c index 9e9e96c..285da1e 100644 --- a/Modules/cjkcodecs/_codecs_cn.c +++ b/Modules/cjkcodecs/_codecs_cn.c @@ -23,12 +23,12 @@ * A844 undefined U+2015 HORIZONTAL BAR */ -#define GBK_DECODE(dc1, dc2, assi) \ - if ((dc1) == 0xa1 && (dc2) == 0xaa) (assi) = 0x2014; \ - else if ((dc1) == 0xa8 && (dc2) == 0x44) (assi) = 0x2015; \ - else if ((dc1) == 0xa1 && (dc2) == 0xa4) (assi) = 0x00b7; \ - else TRYMAP_DEC(gb2312, assi, dc1 ^ 0x80, dc2 ^ 0x80); \ - else TRYMAP_DEC(gbkext, assi, dc1, dc2); +#define GBK_DECODE(dc1, dc2, writer) \ + if ((dc1) == 0xa1 && (dc2) == 0xaa) OUTCHAR(0x2014); \ + else if ((dc1) == 0xa8 && (dc2) == 0x44) OUTCHAR(0x2015); \ + else if ((dc1) == 0xa1 && (dc2) == 0xa4) OUTCHAR(0x00b7); \ + else TRYMAP_DEC(gb2312, writer, dc1 ^ 0x80, dc2 ^ 0x80); \ + else TRYMAP_DEC(gbkext, writer, dc1, dc2); #define GBK_ENCODE(code, assi) \ if ((code) == 0x2014) (assi) = 0xa1aa; \ @@ -42,16 +42,18 @@ ENCODER(gb2312) { - while (inleft > 0) { - Py_UNICODE c = IN1; + while (*inpos < inlen) { + Py_UCS4 c = INCHAR1; DBCHAR code; if (c < 0x80) { - WRITE1((unsigned char)c) - NEXT(1, 1) + WRITEBYTE1((unsigned char)c) + NEXT(1, 1); continue; } - UCS4INVALID(c) + + if (c > 0xFFFF) + return 1; REQUIRE_OUTBUF(2) TRYMAP_ENC(gbcommon, code, c); @@ -60,9 +62,9 @@ ENCODER(gb2312) if (code & 0x8000) /* MSB set: GBK */ return 1; - OUT1((code >> 8) | 0x80) - OUT2((code & 0xFF) | 0x80) - NEXT(1, 2) + OUTBYTE1((code >> 8) | 0x80) + OUTBYTE2((code & 0xFF) | 0x80) + NEXT(1, 2); } return 0; @@ -73,17 +75,15 @@ DECODER(gb2312) while (inleft > 0) { unsigned char c = **inbuf; - REQUIRE_OUTBUF(1) - if (c < 0x80) { - OUT1(c) - NEXT(1, 1) + OUTCHAR(c); + NEXT_IN(1); continue; } REQUIRE_INBUF(2) - TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) { - NEXT(2, 1) + TRYMAP_DEC(gb2312, writer, c ^ 0x80, INBYTE2 ^ 0x80) { + NEXT_IN(2); } else return 1; } @@ -98,28 +98,30 @@ DECODER(gb2312) ENCODER(gbk) { - while (inleft > 0) { - Py_UNICODE c = IN1; + while (*inpos < inlen) { + Py_UCS4 c = INCHAR1; DBCHAR code; if (c < 0x80) { - WRITE1((unsigned char)c) - NEXT(1, 1) + WRITEBYTE1((unsigned char)c) + NEXT(1, 1); continue; } - UCS4INVALID(c) + + if (c > 0xFFFF) + return 1; REQUIRE_OUTBUF(2) GBK_ENCODE(c, code) else return 1; - OUT1((code >> 8) | 0x80) + OUTBYTE1((code >> 8) | 0x80) if (code & 0x8000) - OUT2((code & 0xFF)) /* MSB set: GBK */ + OUTBYTE2((code & 0xFF)) /* MSB set: GBK */ else - OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */ - NEXT(1, 2) + OUTBYTE2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */ + NEXT(1, 2); } return 0; @@ -128,22 +130,20 @@ ENCODER(gbk) DECODER(gbk) { while (inleft > 0) { - unsigned char c = IN1; - - REQUIRE_OUTBUF(1) + unsigned char c = INBYTE1; if (c < 0x80) { - OUT1(c) - NEXT(1, 1) + OUTCHAR(c); + NEXT_IN(1); continue; } REQUIRE_INBUF(2) - GBK_DECODE(c, IN2, **outbuf) + GBK_DECODE(c, INBYTE2, writer) else return 1; - NEXT(2, 1) + NEXT_IN(2); } return 0; @@ -156,41 +156,31 @@ DECODER(gbk) ENCODER(gb18030) { - while (inleft > 0) { - ucs4_t c = IN1; + while (*inpos < inlen) { + Py_UCS4 c = INCHAR1; DBCHAR code; if (c < 0x80) { - WRITE1(c) - NEXT(1, 1) + WRITEBYTE1(c) + NEXT(1, 1); continue; } - DECODE_SURROGATE(c) - if (c > 0x10FFFF) -#if Py_UNICODE_SIZE == 2 - return 2; /* surrogates pair */ -#else - return 1; -#endif - else if (c >= 0x10000) { - ucs4_t tc = c - 0x10000; + if (c >= 0x10000) { + Py_UCS4 tc = c - 0x10000; + assert (c <= 0x10FFFF); REQUIRE_OUTBUF(4) - OUT4((unsigned char)(tc % 10) + 0x30) + OUTBYTE4((unsigned char)(tc % 10) + 0x30) tc /= 10; - OUT3((unsigned char)(tc % 126) + 0x81) + OUTBYTE3((unsigned char)(tc % 126) + 0x81) tc /= 126; - OUT2((unsigned char)(tc % 10) + 0x30) + OUTBYTE2((unsigned char)(tc % 10) + 0x30) tc /= 10; - OUT1((unsigned char)(tc + 0x90)) + OUTBYTE1((unsigned char)(tc + 0x90)) -#if Py_UNICODE_SIZE == 2 - NEXT(2, 4) /* surrogates pair */ -#else - NEXT(1, 4) -#endif + NEXT(1, 4); continue; } @@ -208,20 +198,20 @@ ENCODER(gb18030) utrrange++) if (utrrange->first <= c && c <= utrrange->last) { - Py_UNICODE tc; + Py_UCS4 tc; tc = c - utrrange->first + utrrange->base; - OUT4((unsigned char)(tc % 10) + 0x30) + OUTBYTE4((unsigned char)(tc % 10) + 0x30) tc /= 10; - OUT3((unsigned char)(tc % 126) + 0x81) + OUTBYTE3((unsigned char)(tc % 126) + 0x81) tc /= 126; - OUT2((unsigned char)(tc % 10) + 0x30) + OUTBYTE2((unsigned char)(tc % 10) + 0x30) tc /= 10; - OUT1((unsigned char)tc + 0x81) + OUTBYTE1((unsigned char)tc + 0x81) - NEXT(1, 4) + NEXT(1, 4); break; } @@ -230,13 +220,13 @@ ENCODER(gb18030) continue; } - OUT1((code >> 8) | 0x80) + OUTBYTE1((code >> 8) | 0x80) if (code & 0x8000) - OUT2((code & 0xFF)) /* MSB set: GBK or GB18030ext */ + OUTBYTE2((code & 0xFF)) /* MSB set: GBK or GB18030ext */ else - OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */ + OUTBYTE2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */ - NEXT(1, 2) + NEXT(1, 2); } return 0; @@ -245,61 +235,59 @@ ENCODER(gb18030) DECODER(gb18030) { while (inleft > 0) { - unsigned char c = IN1, c2; - - REQUIRE_OUTBUF(1) + unsigned char c = INBYTE1, c2; if (c < 0x80) { - OUT1(c) - NEXT(1, 1) + OUTCHAR(c); + NEXT_IN(1); continue; } REQUIRE_INBUF(2) - c2 = IN2; + c2 = INBYTE2; if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */ const struct _gb18030_to_unibmp_ranges *utr; unsigned char c3, c4; - ucs4_t lseq; + Py_UCS4 lseq; REQUIRE_INBUF(4) - c3 = IN3; - c4 = IN4; + c3 = INBYTE3; + c4 = INBYTE4; if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39) return 1; c -= 0x81; c2 -= 0x30; c3 -= 0x81; c4 -= 0x30; if (c < 4) { /* U+0080 - U+FFFF */ - lseq = ((ucs4_t)c * 10 + c2) * 1260 + - (ucs4_t)c3 * 10 + c4; + lseq = ((Py_UCS4)c * 10 + c2) * 1260 + + (Py_UCS4)c3 * 10 + c4; if (lseq < 39420) { for (utr = gb18030_to_unibmp_ranges; lseq >= (utr + 1)->base; utr++) ; - OUT1(utr->first - utr->base + lseq) - NEXT(4, 1) + OUTCHAR(utr->first - utr->base + lseq); + NEXT_IN(4); continue; } } else if (c >= 15) { /* U+10000 - U+10FFFF */ - lseq = 0x10000 + (((ucs4_t)c-15) * 10 + c2) - * 1260 + (ucs4_t)c3 * 10 + c4; + lseq = 0x10000 + (((Py_UCS4)c-15) * 10 + c2) + * 1260 + (Py_UCS4)c3 * 10 + c4; if (lseq <= 0x10FFFF) { - WRITEUCS4(lseq); - NEXT_IN(4) + OUTCHAR(lseq); + NEXT_IN(4); continue; } } return 1; } - GBK_DECODE(c, c2, **outbuf) - else TRYMAP_DEC(gb18030ext, **outbuf, c, c2); + GBK_DECODE(c, c2, writer) + else TRYMAP_DEC(gb18030ext, writer, c, c2); else return 1; - NEXT(2, 1) + NEXT_IN(2); } return 0; @@ -319,33 +307,34 @@ ENCODER_INIT(hz) ENCODER_RESET(hz) { if (state->i != 0) { - WRITE2('~', '}') + WRITEBYTE2('~', '}') state->i = 0; - NEXT_OUT(2) + NEXT_OUT(2); } return 0; } ENCODER(hz) { - while (inleft > 0) { - Py_UNICODE c = IN1; + while (*inpos < inlen) { + Py_UCS4 c = INCHAR1; DBCHAR code; if (c < 0x80) { if (state->i == 0) { - WRITE1((unsigned char)c) - NEXT(1, 1) + WRITEBYTE1((unsigned char)c) + NEXT(1, 1); } else { - WRITE3('~', '}', (unsigned char)c) - NEXT(1, 3) + WRITEBYTE3('~', '}', (unsigned char)c) + NEXT(1, 3); state->i = 0; } continue; } - UCS4INVALID(c) + if (c > 0xFFFF) + return 1; TRYMAP_ENC(gbcommon, code, c); else return 1; @@ -354,13 +343,13 @@ ENCODER(hz) return 1; if (state->i == 0) { - WRITE4('~', '{', code >> 8, code & 0xff) - NEXT(1, 4) + WRITEBYTE4('~', '{', code >> 8, code & 0xff) + NEXT(1, 4); state->i = 1; } else { - WRITE2(code >> 8, code & 0xff) - NEXT(1, 2) + WRITEBYTE2(code >> 8, code & 0xff) + NEXT(1, 2); } } @@ -382,15 +371,15 @@ DECODER_RESET(hz) DECODER(hz) { while (inleft > 0) { - unsigned char c = IN1; + unsigned char c = INBYTE1; if (c == '~') { - unsigned char c2 = IN2; + unsigned char c2 = INBYTE2; REQUIRE_INBUF(2) if (c2 == '~') { - WRITE1('~') - NEXT(2, 1) + OUTCHAR('~'); + NEXT_IN(2); continue; } else if (c2 == '{' && state->i == 0) @@ -401,7 +390,7 @@ DECODER(hz) ; /* line-continuation */ else return 1; - NEXT(2, 0); + NEXT_IN(2); continue; } @@ -409,14 +398,13 @@ DECODER(hz) return 1; if (state->i == 0) { /* ASCII mode */ - WRITE1(c) - NEXT(1, 1) + OUTCHAR(c); + NEXT_IN(1); } else { /* GB mode */ REQUIRE_INBUF(2) - REQUIRE_OUTBUF(1) - TRYMAP_DEC(gb2312, **outbuf, c, IN2) { - NEXT(2, 1) + TRYMAP_DEC(gb2312, writer, c, INBYTE2) { + NEXT_IN(2); } else return 1; diff --git a/Modules/cjkcodecs/_codecs_hk.c b/Modules/cjkcodecs/_codecs_hk.c index d3ad04b..fe5f597 100644 --- a/Modules/cjkcodecs/_codecs_hk.c +++ b/Modules/cjkcodecs/_codecs_hk.c @@ -38,35 +38,39 @@ static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5 ENCODER(big5hkscs) { - while (inleft > 0) { - ucs4_t c = **inbuf; + while (*inpos < inlen) { + Py_UCS4 c = INCHAR1; DBCHAR code; Py_ssize_t insize; if (c < 0x80) { REQUIRE_OUTBUF(1) **outbuf = (unsigned char)c; - NEXT(1, 1) + NEXT(1, 1); continue; } - DECODE_SURROGATE(c) - insize = GET_INSIZE(c); - + insize = 1; REQUIRE_OUTBUF(2) if (c < 0x10000) { TRYMAP_ENC(big5hkscs_bmp, code, c) { if (code == MULTIC) { - if (inleft >= 2 && + Py_UCS4 c2; + if (inlen - *inpos >= 2) + c2 = INCHAR2; + else + c2 = 0; + + if (inlen - *inpos >= 2 && ((c & 0xffdf) == 0x00ca) && - (((*inbuf)[1] & 0xfff7) == 0x0304)) { + ((c2 & 0xfff7) == 0x0304)) { code = big5hkscs_pairenc_table[ ((c >> 4) | - ((*inbuf)[1] >> 3)) & 3]; + (c2 >> 3)) & 3]; insize = 2; } - else if (inleft < 2 && + else if (inlen - *inpos < 2 && !(flags & MBENC_FLUSH)) return MBERR_TOOFEW; else { @@ -89,9 +93,9 @@ ENCODER(big5hkscs) else return insize; - OUT1(code >> 8) - OUT2(code & 0xFF) - NEXT(insize, 2) + OUTBYTE1(code >> 8) + OUTBYTE2(code & 0xFF) + NEXT(insize, 2); } return 0; @@ -102,33 +106,31 @@ ENCODER(big5hkscs) DECODER(big5hkscs) { while (inleft > 0) { - unsigned char c = IN1; - ucs4_t decoded; - - REQUIRE_OUTBUF(1) + unsigned char c = INBYTE1; + Py_UCS4 decoded; if (c < 0x80) { - OUT1(c) - NEXT(1, 1) + OUTCHAR(c); + NEXT_IN(1); continue; } REQUIRE_INBUF(2) - if (0xc6 > c || c > 0xc8 || (c < 0xc7 && IN2 < 0xa1)) { - TRYMAP_DEC(big5, **outbuf, c, IN2) { - NEXT(2, 1) + if (0xc6 > c || c > 0xc8 || (c < 0xc7 && INBYTE2 < 0xa1)) { + TRYMAP_DEC(big5, writer, c, INBYTE2) { + NEXT_IN(2); continue; } } - TRYMAP_DEC(big5hkscs, decoded, c, IN2) + TRYMAP_DEC_CHAR(big5hkscs, decoded, c, INBYTE2) { - int s = BH2S(c, IN2); + int s = BH2S(c, INBYTE2); const unsigned char *hintbase; assert(0x87 <= c && c <= 0xfe); - assert(0x40 <= IN2 && IN2 <= 0xfe); + assert(0x40 <= INBYTE2 && INBYTE2 <= 0xfe); if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) { hintbase = big5hkscs_phint_0; @@ -146,25 +148,25 @@ DECODER(big5hkscs) return MBERR_INTERNAL; if (hintbase[s >> 3] & (1 << (s & 7))) { - WRITEUCS4(decoded | 0x20000) - NEXT_IN(2) + OUTCHAR(decoded | 0x20000); + NEXT_IN(2); } else { - OUT1(decoded) - NEXT(2, 1) + OUTCHAR(decoded); + NEXT_IN(2); } continue; } - switch ((c << 8) | IN2) { - case 0x8862: WRITE2(0x00ca, 0x0304); break; - case 0x8864: WRITE2(0x00ca, 0x030c); break; - case 0x88a3: WRITE2(0x00ea, 0x0304); break; - case 0x88a5: WRITE2(0x00ea, 0x030c); break; + switch ((c << 8) | INBYTE2) { + case 0x8862: OUTCHAR2(0x00ca, 0x0304); break; + case 0x8864: OUTCHAR2(0x00ca, 0x030c); break; + case 0x88a3: OUTCHAR2(0x00ea, 0x0304); break; + case 0x88a5: OUTCHAR2(0x00ea, 0x030c); break; default: return 1; } - NEXT(2, 2) /* all decoded codepoints are pairs, above. */ + NEXT_IN(2); /* all decoded codepoints are pairs, above. */ } return 0; diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c index cbc1542..bb63835 100644 --- a/Modules/cjkcodecs/_codecs_iso2022.c +++ b/Modules/cjkcodecs/_codecs_iso2022.c @@ -102,8 +102,8 @@ /*-*- internal data structures -*-*/ typedef int (*iso2022_init_func)(void); -typedef ucs4_t (*iso2022_decode_func)(const unsigned char *data); -typedef DBCHAR (*iso2022_encode_func)(const ucs4_t *data, Py_ssize_t *length); +typedef Py_UCS4 (*iso2022_decode_func)(const unsigned char *data); +typedef DBCHAR (*iso2022_encode_func)(const Py_UCS4 *data, Py_ssize_t *length); struct iso2022_designation { unsigned char mark; @@ -141,13 +141,13 @@ ENCODER_INIT(iso2022) ENCODER_RESET(iso2022) { if (STATE_GETFLAG(F_SHIFTED)) { - WRITE1(SI) - NEXT_OUT(1) + WRITEBYTE1(SI) + NEXT_OUT(1); STATE_CLEARFLAG(F_SHIFTED) } if (STATE_G0 != CHARSET_ASCII) { - WRITE3(ESC, '(', 'B') - NEXT_OUT(3) + WRITEBYTE3(ESC, '(', 'B') + NEXT_OUT(3); STATE_SETG0(CHARSET_ASCII) } return 0; @@ -155,30 +155,29 @@ ENCODER_RESET(iso2022) ENCODER(iso2022) { - while (inleft > 0) { + while (*inpos < inlen) { const struct iso2022_designation *dsg; DBCHAR encoded; - ucs4_t c = **inbuf; + Py_UCS4 c = INCHAR1; Py_ssize_t insize; if (c < 0x80) { if (STATE_G0 != CHARSET_ASCII) { - WRITE3(ESC, '(', 'B') + WRITEBYTE3(ESC, '(', 'B') STATE_SETG0(CHARSET_ASCII) - NEXT_OUT(3) + NEXT_OUT(3); } if (STATE_GETFLAG(F_SHIFTED)) { - WRITE1(SI) + WRITEBYTE1(SI) STATE_CLEARFLAG(F_SHIFTED) - NEXT_OUT(1) + NEXT_OUT(1); } - WRITE1((unsigned char)c) - NEXT(1, 1) + WRITEBYTE1((unsigned char)c) + NEXT(1, 1); continue; } - DECODE_SURROGATE(c) - insize = GET_INSIZE(c); + insize = 1; encoded = MAP_UNMAPPABLE; for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) { @@ -187,24 +186,14 @@ ENCODER(iso2022) if (encoded == MAP_MULTIPLE_AVAIL) { /* this implementation won't work for pair * of non-bmp characters. */ - if (inleft < 2) { + if (inlen - *inpos < 2) { if (!(flags & MBENC_FLUSH)) return MBERR_TOOFEW; length = -1; } else length = 2; -#if Py_UNICODE_SIZE == 2 - if (length == 2) { - ucs4_t u4in[2]; - u4in[0] = (ucs4_t)IN1; - u4in[1] = (ucs4_t)IN2; - encoded = dsg->encoder(u4in, &length); - } else - encoded = dsg->encoder(&c, &length); -#else encoded = dsg->encoder(&c, &length); -#endif if (encoded != MAP_UNMAPPABLE) { insize = length; break; @@ -221,47 +210,47 @@ ENCODER(iso2022) switch (dsg->plane) { case 0: /* G0 */ if (STATE_GETFLAG(F_SHIFTED)) { - WRITE1(SI) + WRITEBYTE1(SI) STATE_CLEARFLAG(F_SHIFTED) - NEXT_OUT(1) + NEXT_OUT(1); } if (STATE_G0 != dsg->mark) { if (dsg->width == 1) { - WRITE3(ESC, '(', ESCMARK(dsg->mark)) + WRITEBYTE3(ESC, '(', ESCMARK(dsg->mark)) STATE_SETG0(dsg->mark) - NEXT_OUT(3) + NEXT_OUT(3); } else if (dsg->mark == CHARSET_JISX0208) { - WRITE3(ESC, '$', ESCMARK(dsg->mark)) + WRITEBYTE3(ESC, '$', ESCMARK(dsg->mark)) STATE_SETG0(dsg->mark) - NEXT_OUT(3) + NEXT_OUT(3); } else { - WRITE4(ESC, '$', '(', + WRITEBYTE4(ESC, '$', '(', ESCMARK(dsg->mark)) STATE_SETG0(dsg->mark) - NEXT_OUT(4) + NEXT_OUT(4); } } break; case 1: /* G1 */ if (STATE_G1 != dsg->mark) { if (dsg->width == 1) { - WRITE3(ESC, ')', ESCMARK(dsg->mark)) + WRITEBYTE3(ESC, ')', ESCMARK(dsg->mark)) STATE_SETG1(dsg->mark) - NEXT_OUT(3) + NEXT_OUT(3); } else { - WRITE4(ESC, '$', ')', + WRITEBYTE4(ESC, '$', ')', ESCMARK(dsg->mark)) STATE_SETG1(dsg->mark) - NEXT_OUT(4) + NEXT_OUT(4); } } if (!STATE_GETFLAG(F_SHIFTED)) { - WRITE1(SO) + WRITEBYTE1(SO) STATE_SETFLAG(F_SHIFTED) - NEXT_OUT(1) + NEXT_OUT(1); } break; default: /* G2 and G3 is not supported: no encoding in @@ -270,14 +259,14 @@ ENCODER(iso2022) } if (dsg->width == 1) { - WRITE1((unsigned char)encoded) - NEXT_OUT(1) + WRITEBYTE1((unsigned char)encoded) + NEXT_OUT(1); } else { - WRITE2(encoded >> 8, encoded & 0xff) - NEXT_OUT(2) + WRITEBYTE2(encoded >> 8, encoded & 0xff) + NEXT_OUT(2); } - NEXT_IN(insize) + NEXT_INCHAR(insize); } return 0; @@ -323,26 +312,26 @@ iso2022processesc(const void *config, MultibyteCodec_State *state, switch (esclen) { case 3: - if (IN2 == '$') { - charset = IN3 | CHARSET_DBCS; + if (INBYTE2 == '$') { + charset = INBYTE3 | CHARSET_DBCS; designation = 0; } else { - charset = IN3; - if (IN2 == '(') designation = 0; - else if (IN2 == ')') designation = 1; - else if (CONFIG_ISSET(USE_G2) && IN2 == '.') + charset = INBYTE3; + if (INBYTE2 == '(') designation = 0; + else if (INBYTE2 == ')') designation = 1; + else if (CONFIG_ISSET(USE_G2) && INBYTE2 == '.') designation = 2; else return 3; } break; case 4: - if (IN2 != '$') + if (INBYTE2 != '$') return 4; - charset = IN4 | CHARSET_DBCS; - if (IN3 == '(') designation = 0; - else if (IN3 == ')') designation = 1; + charset = INBYTE4 | CHARSET_DBCS; + if (INBYTE3 == '(') designation = 0; + else if (INBYTE3 == ')') designation = 1; else return 4; break; case 6: /* designation with prefix */ @@ -376,45 +365,43 @@ iso2022processesc(const void *config, MultibyteCodec_State *state, return 0; } -#define ISO8859_7_DECODE(c, assi) \ - if ((c) < 0xa0) (assi) = (c); \ - else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \ - (assi) = (c); \ - else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \ - (0xbffffd77L & (1L << ((c)-0xb4))))) \ - (assi) = 0x02d0 + (c); \ - else if ((c) == 0xa1) (assi) = 0x2018; \ - else if ((c) == 0xa2) (assi) = 0x2019; \ - else if ((c) == 0xaf) (assi) = 0x2015; +#define ISO8859_7_DECODE(c, writer) \ + if ((c) < 0xa0) OUTCHAR(c); \ + else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \ + OUTCHAR(c); \ + else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \ + (0xbffffd77L & (1L << ((c)-0xb4))))) \ + OUTCHAR(0x02d0 + (c)); \ + else if ((c) == 0xa1) OUTCHAR(0x2018); \ + else if ((c) == 0xa2) OUTCHAR(0x2019); \ + else if ((c) == 0xaf) OUTCHAR(0x2015); static Py_ssize_t iso2022processg2(const void *config, MultibyteCodec_State *state, const unsigned char **inbuf, Py_ssize_t *inleft, - Py_UNICODE **outbuf, Py_ssize_t *outleft) + _PyUnicodeWriter *writer) { /* not written to use encoder, decoder functions because only few * encodings use G2 designations in CJKCodecs */ if (STATE_G2 == CHARSET_ISO8859_1) { - if (IN3 < 0x80) - OUT1(IN3 + 0x80) + if (INBYTE3 < 0x80) + OUTCHAR(INBYTE3 + 0x80); else return 3; } else if (STATE_G2 == CHARSET_ISO8859_7) { - ISO8859_7_DECODE(IN3 ^ 0x80, **outbuf) + ISO8859_7_DECODE(INBYTE3 ^ 0x80, writer) else return 3; } else if (STATE_G2 == CHARSET_ASCII) { - if (IN3 & 0x80) return 3; - else **outbuf = IN3; + if (INBYTE3 & 0x80) return 3; + else OUTCHAR(INBYTE3); } else return MBERR_INTERNAL; (*inbuf) += 3; *inleft -= 3; - (*outbuf) += 1; - *outleft -= 1; return 0; } @@ -423,14 +410,14 @@ DECODER(iso2022) const struct iso2022_designation *dsgcache = NULL; while (inleft > 0) { - unsigned char c = IN1; + unsigned char c = INBYTE1; Py_ssize_t err; if (STATE_GETFLAG(F_ESCTHROUGHOUT)) { /* ESC throughout mode: * for non-iso2022 escape sequences */ - WRITE1(c) /* assume as ISO-8859-1 */ - NEXT(1, 1) + OUTCHAR(c); /* assume as ISO-8859-1 */ + NEXT_IN(1); if (IS_ESCEND(c)) { STATE_CLEARFLAG(F_ESCTHROUGHOUT) } @@ -440,41 +427,41 @@ DECODER(iso2022) switch (c) { case ESC: REQUIRE_INBUF(2) - if (IS_ISO2022ESC(IN2)) { + if (IS_ISO2022ESC(INBYTE2)) { err = iso2022processesc(config, state, inbuf, &inleft); if (err != 0) return err; } - else if (CONFIG_ISSET(USE_G2) && IN2 == 'N') {/* SS2 */ + else if (CONFIG_ISSET(USE_G2) && INBYTE2 == 'N') {/* SS2 */ REQUIRE_INBUF(3) err = iso2022processg2(config, state, - inbuf, &inleft, outbuf, &outleft); + inbuf, &inleft, writer); if (err != 0) return err; } else { - WRITE1(ESC) + OUTCHAR(ESC); STATE_SETFLAG(F_ESCTHROUGHOUT) - NEXT(1, 1) + NEXT_IN(1); } break; case SI: if (CONFIG_ISSET(NO_SHIFT)) goto bypass; STATE_CLEARFLAG(F_SHIFTED) - NEXT_IN(1) + NEXT_IN(1); break; case SO: if (CONFIG_ISSET(NO_SHIFT)) goto bypass; STATE_SETFLAG(F_SHIFTED) - NEXT_IN(1) + NEXT_IN(1); break; case LF: STATE_CLEARFLAG(F_SHIFTED) - WRITE1(LF) - NEXT(1, 1) + OUTCHAR(LF); + NEXT_IN(1); break; default: if (c < 0x20) /* C0 */ @@ -484,7 +471,7 @@ DECODER(iso2022) else { const struct iso2022_designation *dsg; unsigned char charset; - ucs4_t decoded; + Py_UCS4 decoded; if (STATE_GETFLAG(F_SHIFTED)) charset = STATE_G1; @@ -492,8 +479,8 @@ DECODER(iso2022) charset = STATE_G0; if (charset == CHARSET_ASCII) { -bypass: WRITE1(c) - NEXT(1, 1) +bypass: OUTCHAR(c); + NEXT_IN(1); break; } @@ -518,17 +505,15 @@ bypass: WRITE1(c) return dsg->width; if (decoded < 0x10000) { - WRITE1(decoded) - NEXT_OUT(1) + OUTCHAR(decoded); } else if (decoded < 0x30000) { - WRITEUCS4(decoded) + OUTCHAR(decoded); } else { /* JIS X 0213 pairs */ - WRITE2(decoded >> 16, decoded & 0xffff) - NEXT_OUT(2) + OUTCHAR2(decoded >> 16, decoded & 0xffff); } - NEXT_IN(dsg->width) + NEXT_IN(dsg->width); } break; } @@ -577,18 +562,18 @@ ksx1001_init(void) return 0; } -static ucs4_t +static Py_UCS4 ksx1001_decoder(const unsigned char *data) { - ucs4_t u; - TRYMAP_DEC(ksx1001, u, data[0], data[1]) + Py_UCS4 u; + TRYMAP_DEC_CHAR(ksx1001, u, data[0], data[1]) return u; else return MAP_UNMAPPABLE; } static DBCHAR -ksx1001_encoder(const ucs4_t *data, Py_ssize_t *length) +ksx1001_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; assert(*length == 1); @@ -613,20 +598,20 @@ jisx0208_init(void) return 0; } -static ucs4_t +static Py_UCS4 jisx0208_decoder(const unsigned char *data) { - ucs4_t u; + Py_UCS4 u; if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */ return 0xff3c; - else TRYMAP_DEC(jisx0208, u, data[0], data[1]) + else TRYMAP_DEC_CHAR(jisx0208, u, data[0], data[1]) return u; else return MAP_UNMAPPABLE; } static DBCHAR -jisx0208_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0208_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; assert(*length == 1); @@ -654,18 +639,18 @@ jisx0212_init(void) return 0; } -static ucs4_t +static Py_UCS4 jisx0212_decoder(const unsigned char *data) { - ucs4_t u; - TRYMAP_DEC(jisx0212, u, data[0], data[1]) + Py_UCS4 u; + TRYMAP_DEC_CHAR(jisx0212, u, data[0], data[1]) return u; else return MAP_UNMAPPABLE; } static DBCHAR -jisx0212_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0212_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; assert(*length == 1); @@ -705,30 +690,30 @@ jisx0213_init(void) } #define config ((void *)2000) -static ucs4_t +static Py_UCS4 jisx0213_2000_1_decoder(const unsigned char *data) { - ucs4_t u; + Py_UCS4 u; EMULATE_JISX0213_2000_DECODE_PLANE1(u, data[0], data[1]) else if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */ return 0xff3c; - else TRYMAP_DEC(jisx0208, u, data[0], data[1]); - else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]); - else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1]) + else TRYMAP_DEC_CHAR(jisx0208, u, data[0], data[1]); + else TRYMAP_DEC_CHAR(jisx0213_1_bmp, u, data[0], data[1]); + else TRYMAP_DEC_CHAR(jisx0213_1_emp, u, data[0], data[1]) u |= 0x20000; - else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]); + else TRYMAP_DEC_CHAR(jisx0213_pair, u, data[0], data[1]); else return MAP_UNMAPPABLE; return u; } -static ucs4_t +static Py_UCS4 jisx0213_2000_2_decoder(const unsigned char *data) { - ucs4_t u; - EMULATE_JISX0213_2000_DECODE_PLANE2(u, data[0], data[1]) - TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]); - else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1]) + Py_UCS4 u; + EMULATE_JISX0213_2000_DECODE_PLANE2_CHAR(u, data[0], data[1]) + TRYMAP_DEC_CHAR(jisx0213_2_bmp, u, data[0], data[1]); + else TRYMAP_DEC_CHAR(jisx0213_2_emp, u, data[0], data[1]) u |= 0x20000; else return MAP_UNMAPPABLE; @@ -736,28 +721,28 @@ jisx0213_2000_2_decoder(const unsigned char *data) } #undef config -static ucs4_t +static Py_UCS4 jisx0213_2004_1_decoder(const unsigned char *data) { - ucs4_t u; + Py_UCS4 u; if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */ return 0xff3c; - else TRYMAP_DEC(jisx0208, u, data[0], data[1]); - else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]); - else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1]) + else TRYMAP_DEC_CHAR(jisx0208, u, data[0], data[1]); + else TRYMAP_DEC_CHAR(jisx0213_1_bmp, u, data[0], data[1]); + else TRYMAP_DEC_CHAR(jisx0213_1_emp, u, data[0], data[1]) u |= 0x20000; - else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]); + else TRYMAP_DEC_CHAR(jisx0213_pair, u, data[0], data[1]); else return MAP_UNMAPPABLE; return u; } -static ucs4_t +static Py_UCS4 jisx0213_2004_2_decoder(const unsigned char *data) { - ucs4_t u; - TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]); - else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1]) + Py_UCS4 u; + TRYMAP_DEC_CHAR(jisx0213_2_bmp, u, data[0], data[1]); + else TRYMAP_DEC_CHAR(jisx0213_2_emp, u, data[0], data[1]) u |= 0x20000; else return MAP_UNMAPPABLE; @@ -765,7 +750,7 @@ jisx0213_2004_2_decoder(const unsigned char *data) } static DBCHAR -jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config) +jisx0213_encoder(const Py_UCS4 *data, Py_ssize_t *length, void *config) { DBCHAR coded; @@ -819,7 +804,7 @@ jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config) } static DBCHAR -jisx0213_2000_1_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0213_2000_1_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded = jisx0213_encoder(data, length, (void *)2000); if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) @@ -831,7 +816,7 @@ jisx0213_2000_1_encoder(const ucs4_t *data, Py_ssize_t *length) } static DBCHAR -jisx0213_2000_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length) +jisx0213_2000_1_encoder_paironly(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; Py_ssize_t ilength = *length; @@ -854,7 +839,7 @@ jisx0213_2000_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length) } static DBCHAR -jisx0213_2000_2_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0213_2000_2_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded = jisx0213_encoder(data, length, (void *)2000); if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) @@ -866,7 +851,7 @@ jisx0213_2000_2_encoder(const ucs4_t *data, Py_ssize_t *length) } static DBCHAR -jisx0213_2004_1_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0213_2004_1_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded = jisx0213_encoder(data, length, NULL); if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) @@ -878,7 +863,7 @@ jisx0213_2004_1_encoder(const ucs4_t *data, Py_ssize_t *length) } static DBCHAR -jisx0213_2004_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length) +jisx0213_2004_1_encoder_paironly(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; Py_ssize_t ilength = *length; @@ -901,7 +886,7 @@ jisx0213_2004_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length) } static DBCHAR -jisx0213_2004_2_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0213_2004_2_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded = jisx0213_encoder(data, length, NULL); if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) @@ -912,17 +897,17 @@ jisx0213_2004_2_encoder(const ucs4_t *data, Py_ssize_t *length) return MAP_UNMAPPABLE; } -static ucs4_t +static Py_UCS4 jisx0201_r_decoder(const unsigned char *data) { - ucs4_t u; - JISX0201_R_DECODE(*data, u) + Py_UCS4 u; + JISX0201_R_DECODE_CHAR(*data, u) else return MAP_UNMAPPABLE; return u; } static DBCHAR -jisx0201_r_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0201_r_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; JISX0201_R_ENCODE(*data, coded) @@ -930,17 +915,17 @@ jisx0201_r_encoder(const ucs4_t *data, Py_ssize_t *length) return coded; } -static ucs4_t +static Py_UCS4 jisx0201_k_decoder(const unsigned char *data) { - ucs4_t u; - JISX0201_K_DECODE(*data ^ 0x80, u) + Py_UCS4 u; + JISX0201_K_DECODE_CHAR(*data ^ 0x80, u) else return MAP_UNMAPPABLE; return u; } static DBCHAR -jisx0201_k_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0201_k_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; JISX0201_K_ENCODE(*data, coded) @@ -961,18 +946,18 @@ gb2312_init(void) return 0; } -static ucs4_t +static Py_UCS4 gb2312_decoder(const unsigned char *data) { - ucs4_t u; - TRYMAP_DEC(gb2312, u, data[0], data[1]) + Py_UCS4 u; + TRYMAP_DEC_CHAR(gb2312, u, data[0], data[1]) return u; else return MAP_UNMAPPABLE; } static DBCHAR -gb2312_encoder(const ucs4_t *data, Py_ssize_t *length) +gb2312_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; assert(*length == 1); @@ -986,14 +971,14 @@ gb2312_encoder(const ucs4_t *data, Py_ssize_t *length) } -static ucs4_t +static Py_UCS4 dummy_decoder(const unsigned char *data) { return MAP_UNMAPPABLE; } static DBCHAR -dummy_encoder(const ucs4_t *data, Py_ssize_t *length) +dummy_encoder(const Py_UCS4 *data, Py_ssize_t *length) { return MAP_UNMAPPABLE; } diff --git a/Modules/cjkcodecs/_codecs_jp.c b/Modules/cjkcodecs/_codecs_jp.c index a500696..7ab318b 100644 --- a/Modules/cjkcodecs/_codecs_jp.c +++ b/Modules/cjkcodecs/_codecs_jp.c @@ -19,38 +19,39 @@ ENCODER(cp932) { - while (inleft > 0) { - Py_UNICODE c = IN1; + while (*inpos < inlen) { + Py_UCS4 c = INCHAR1; DBCHAR code; unsigned char c1, c2; if (c <= 0x80) { - WRITE1((unsigned char)c) - NEXT(1, 1) + WRITEBYTE1((unsigned char)c) + NEXT(1, 1); continue; } else if (c >= 0xff61 && c <= 0xff9f) { - WRITE1(c - 0xfec0) - NEXT(1, 1) + WRITEBYTE1(c - 0xfec0) + NEXT(1, 1); continue; } else if (c >= 0xf8f0 && c <= 0xf8f3) { /* Windows compatibility */ REQUIRE_OUTBUF(1) if (c == 0xf8f0) - OUT1(0xa0) + OUTBYTE1(0xa0) else - OUT1(c - 0xfef1 + 0xfd) - NEXT(1, 1) + OUTBYTE1(c - 0xfef1 + 0xfd) + NEXT(1, 1); continue; } - UCS4INVALID(c) + if (c > 0xFFFF) + return 1; REQUIRE_OUTBUF(2) TRYMAP_ENC(cp932ext, code, c) { - OUT1(code >> 8) - OUT2(code & 0xff) + OUTBYTE1(code >> 8) + OUTBYTE2(code & 0xff) } else TRYMAP_ENC(jisxcommon, code, c) { if (code & 0x8000) /* MSB set: JIS X 0212 */ @@ -61,20 +62,20 @@ ENCODER(cp932) c2 = code & 0xff; c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21); c1 = (c1 - 0x21) >> 1; - OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1) - OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41) + OUTBYTE1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1) + OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41) } else if (c >= 0xe000 && c < 0xe758) { /* User-defined area */ - c1 = (Py_UNICODE)(c - 0xe000) / 188; - c2 = (Py_UNICODE)(c - 0xe000) % 188; - OUT1(c1 + 0xf0) - OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41) + c1 = (Py_UCS4)(c - 0xe000) / 188; + c2 = (Py_UCS4)(c - 0xe000) % 188; + OUTBYTE1(c1 + 0xf0) + OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41) } else return 1; - NEXT(1, 2) + NEXT(1, 2); } return 0; @@ -83,33 +84,32 @@ ENCODER(cp932) DECODER(cp932) { while (inleft > 0) { - unsigned char c = IN1, c2; + unsigned char c = INBYTE1, c2; - REQUIRE_OUTBUF(1) if (c <= 0x80) { - OUT1(c) - NEXT(1, 1) + OUTCHAR(c); + NEXT_IN(1); continue; } else if (c >= 0xa0 && c <= 0xdf) { if (c == 0xa0) - OUT1(0xf8f0) /* half-width katakana */ + OUTCHAR(0xf8f0); /* half-width katakana */ else - OUT1(0xfec0 + c) - NEXT(1, 1) + OUTCHAR(0xfec0 + c); + NEXT_IN(1); continue; } else if (c >= 0xfd/* && c <= 0xff*/) { /* Windows compatibility */ - OUT1(0xf8f1 - 0xfd + c) - NEXT(1, 1) + OUTCHAR(0xf8f1 - 0xfd + c); + NEXT_IN(1); continue; } REQUIRE_INBUF(2) - c2 = IN2; + c2 = INBYTE2; - TRYMAP_DEC(cp932ext, **outbuf, c, c2); + TRYMAP_DEC(cp932ext, writer, c, c2); else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){ if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) return 1; @@ -119,21 +119,21 @@ DECODER(cp932) c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21); c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21; - TRYMAP_DEC(jisx0208, **outbuf, c, c2); + TRYMAP_DEC(jisx0208, writer, c, c2); else return 1; } else if (c >= 0xf0 && c <= 0xf9) { if ((c2 >= 0x40 && c2 <= 0x7e) || (c2 >= 0x80 && c2 <= 0xfc)) - OUT1(0xe000 + 188 * (c - 0xf0) + - (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41)) + OUTCHAR(0xe000 + 188 * (c - 0xf0) + + (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41)); else return 1; } else return 1; - NEXT(2, 1) + NEXT_IN(2); } return 0; @@ -146,25 +146,24 @@ DECODER(cp932) ENCODER(euc_jis_2004) { - while (inleft > 0) { - ucs4_t c = IN1; + while (*inpos < inlen) { + Py_UCS4 c = INCHAR1; DBCHAR code; Py_ssize_t insize; if (c < 0x80) { - WRITE1(c) - NEXT(1, 1) + WRITEBYTE1(c) + NEXT(1, 1); continue; } - DECODE_SURROGATE(c) - insize = GET_INSIZE(c); + insize = 1; if (c <= 0xFFFF) { EMULATE_JISX0213_2000_ENCODE_BMP(code, c) else TRYMAP_ENC(jisx0213_bmp, code, c) { if (code == MULTIC) { - if (inleft < 2) { + if (inlen - *inpos < 2) { if (flags & MBENC_FLUSH) { code = find_pairencmap( (ucs2_t)c, 0, @@ -177,8 +176,9 @@ ENCODER(euc_jis_2004) return MBERR_TOOFEW; } else { + Py_UCS4 c2 = INCHAR2; code = find_pairencmap( - (ucs2_t)c, (*inbuf)[1], + (ucs2_t)c, c2, jisx0213_pair_encmap, JISX0213_ENCPAIRS); if (code == DBCINV) { @@ -196,8 +196,8 @@ ENCODER(euc_jis_2004) else TRYMAP_ENC(jisxcommon, code, c); else if (c >= 0xff61 && c <= 0xff9f) { /* JIS X 0201 half-width katakana */ - WRITE2(0x8e, c - 0xfec0) - NEXT(1, 2) + WRITEBYTE2(0x8e, c - 0xfec0) + NEXT(1, 2); continue; } else if (c == 0xff3c) @@ -219,12 +219,12 @@ ENCODER(euc_jis_2004) if (code & 0x8000) { /* Codeset 2 */ - WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80) - NEXT(insize, 3) + WRITEBYTE3(0x8f, code >> 8, (code & 0xFF) | 0x80) + NEXT(insize, 3); } else { /* Codeset 1 */ - WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80) - NEXT(insize, 2) + WRITEBYTE2((code >> 8) | 0x80, (code & 0xFF) | 0x80) + NEXT(insize, 2); } } @@ -234,14 +234,12 @@ ENCODER(euc_jis_2004) DECODER(euc_jis_2004) { while (inleft > 0) { - unsigned char c = IN1; - ucs4_t code; - - REQUIRE_OUTBUF(1) + unsigned char c = INBYTE1; + Py_UCS4 code; if (c < 0x80) { - OUT1(c) - NEXT(1, 1) + OUTCHAR(c); + NEXT_IN(1); continue; } @@ -250,10 +248,10 @@ DECODER(euc_jis_2004) unsigned char c2; REQUIRE_INBUF(2) - c2 = IN2; + c2 = INBYTE2; if (c2 >= 0xa1 && c2 <= 0xdf) { - OUT1(0xfec0 + c2) - NEXT(2, 1) + OUTCHAR(0xfec0 + c2); + NEXT_IN(2); } else return 1; @@ -262,46 +260,46 @@ DECODER(euc_jis_2004) unsigned char c2, c3; REQUIRE_INBUF(3) - c2 = IN2 ^ 0x80; - c3 = IN3 ^ 0x80; + c2 = INBYTE2 ^ 0x80; + c3 = INBYTE3 ^ 0x80; /* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */ - EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf, c2, c3) - else TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c2, c3) ; - else TRYMAP_DEC(jisx0213_2_emp, code, c2, c3) { - WRITEUCS4(EMPBASE | code) - NEXT_IN(3) + EMULATE_JISX0213_2000_DECODE_PLANE2(writer, c2, c3) + else TRYMAP_DEC(jisx0213_2_bmp, writer, c2, c3) ; + else TRYMAP_DEC_CHAR(jisx0213_2_emp, code, c2, c3) { + OUTCHAR(EMPBASE | code); + NEXT_IN(3); continue; } - else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ; + else TRYMAP_DEC(jisx0212, writer, c2, c3) ; else return 1; - NEXT(3, 1) + NEXT_IN(3); } else { unsigned char c2; REQUIRE_INBUF(2) c ^= 0x80; - c2 = IN2 ^ 0x80; + c2 = INBYTE2 ^ 0x80; /* JIS X 0213 Plane 1 */ - EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf, c, c2) - else if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c; - else if (c == 0x22 && c2 == 0x32) **outbuf = 0xff5e; - else TRYMAP_DEC(jisx0208, **outbuf, c, c2); - else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2); - else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) { - WRITEUCS4(EMPBASE | code) - NEXT_IN(2) + EMULATE_JISX0213_2000_DECODE_PLANE1(writer, c, c2) + else if (c == 0x21 && c2 == 0x40) OUTCHAR(0xff3c); + else if (c == 0x22 && c2 == 0x32) OUTCHAR(0xff5e); + else TRYMAP_DEC(jisx0208, writer, c, c2); + else TRYMAP_DEC(jisx0213_1_bmp, writer, c, c2); + else TRYMAP_DEC_CHAR(jisx0213_1_emp, code, c, c2) { + OUTCHAR(EMPBASE | code); + NEXT_IN(2); continue; } - else TRYMAP_DEC(jisx0213_pair, code, c, c2) { - WRITE2(code >> 16, code & 0xffff) - NEXT(2, 2) + else TRYMAP_DEC_CHAR(jisx0213_pair, code, c, c2) { + OUTCHAR2(code >> 16, code & 0xffff); + NEXT_IN(2); continue; } else return 1; - NEXT(2, 1) + NEXT_IN(2); } } @@ -315,35 +313,36 @@ DECODER(euc_jis_2004) ENCODER(euc_jp) { - while (inleft > 0) { - Py_UNICODE c = IN1; + while (*inpos < inlen) { + Py_UCS4 c = INCHAR1; DBCHAR code; if (c < 0x80) { - WRITE1((unsigned char)c) - NEXT(1, 1) + WRITEBYTE1((unsigned char)c) + NEXT(1, 1); continue; } - UCS4INVALID(c) + if (c > 0xFFFF) + return 1; TRYMAP_ENC(jisxcommon, code, c); else if (c >= 0xff61 && c <= 0xff9f) { /* JIS X 0201 half-width katakana */ - WRITE2(0x8e, c - 0xfec0) - NEXT(1, 2) + WRITEBYTE2(0x8e, c - 0xfec0) + NEXT(1, 2); continue; } #ifndef STRICT_BUILD else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */ code = 0x2140; else if (c == 0xa5) { /* YEN SIGN */ - WRITE1(0x5c); - NEXT(1, 1) + WRITEBYTE1(0x5c); + NEXT(1, 1); continue; } else if (c == 0x203e) { /* OVERLINE */ - WRITE1(0x7e); - NEXT(1, 1) + WRITEBYTE1(0x7e); + NEXT(1, 1); continue; } #endif @@ -352,12 +351,12 @@ ENCODER(euc_jp) if (code & 0x8000) { /* JIS X 0212 */ - WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80) - NEXT(1, 3) + WRITEBYTE3(0x8f, code >> 8, (code & 0xFF) | 0x80) + NEXT(1, 3); } else { /* JIS X 0208 */ - WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80) - NEXT(1, 2) + WRITEBYTE2((code >> 8) | 0x80, (code & 0xFF) | 0x80) + NEXT(1, 2); } } @@ -367,13 +366,11 @@ ENCODER(euc_jp) DECODER(euc_jp) { while (inleft > 0) { - unsigned char c = IN1; - - REQUIRE_OUTBUF(1) + unsigned char c = INBYTE1; if (c < 0x80) { - OUT1(c) - NEXT(1, 1) + OUTCHAR(c); + NEXT_IN(1); continue; } @@ -382,10 +379,10 @@ DECODER(euc_jp) unsigned char c2; REQUIRE_INBUF(2) - c2 = IN2; + c2 = INBYTE2; if (c2 >= 0xa1 && c2 <= 0xdf) { - OUT1(0xfec0 + c2) - NEXT(2, 1) + OUTCHAR(0xfec0 + c2); + NEXT_IN(2); } else return 1; @@ -394,11 +391,11 @@ DECODER(euc_jp) unsigned char c2, c3; REQUIRE_INBUF(3) - c2 = IN2; - c3 = IN3; + c2 = INBYTE2; + c3 = INBYTE3; /* JIS X 0212 */ - TRYMAP_DEC(jisx0212, **outbuf, c2 ^ 0x80, c3 ^ 0x80) { - NEXT(3, 1) + TRYMAP_DEC(jisx0212, writer, c2 ^ 0x80, c3 ^ 0x80) { + NEXT_IN(3); } else return 1; @@ -407,18 +404,18 @@ DECODER(euc_jp) unsigned char c2; REQUIRE_INBUF(2) - c2 = IN2; + c2 = INBYTE2; /* JIS X 0208 */ #ifndef STRICT_BUILD if (c == 0xa1 && c2 == 0xc0) /* FULL-WIDTH REVERSE SOLIDUS */ - **outbuf = 0xff3c; + OUTCHAR(0xff3c); else #endif - TRYMAP_DEC(jisx0208, **outbuf, + TRYMAP_DEC(jisx0208, writer, c ^ 0x80, c2 ^ 0x80) ; else return 1; - NEXT(2, 1) + NEXT_IN(2); } } @@ -432,8 +429,8 @@ DECODER(euc_jp) ENCODER(shift_jis) { - while (inleft > 0) { - Py_UNICODE c = IN1; + while (*inpos < inlen) { + Py_UCS4 c = INCHAR1; DBCHAR code; unsigned char c1, c2; @@ -445,14 +442,16 @@ ENCODER(shift_jis) else if (c == 0x203e) code = 0x7e; /* OVERLINE */ #endif else JISX0201_K_ENCODE(c, code) - else UCS4INVALID(c) - else code = NOCHAR; + else if (c > 0xFFFF) + return 1; + else + code = NOCHAR; if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) { REQUIRE_OUTBUF(1) - OUT1((unsigned char)code) - NEXT(1, 1) + OUTBYTE1((unsigned char)code) + NEXT(1, 1); continue; } @@ -475,9 +474,9 @@ ENCODER(shift_jis) c2 = code & 0xff; c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21); c1 = (c1 - 0x21) >> 1; - OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1) - OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41) - NEXT(1, 2) + OUTBYTE1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1) + OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41) + NEXT(1, 2); } return 0; @@ -486,21 +485,19 @@ ENCODER(shift_jis) DECODER(shift_jis) { while (inleft > 0) { - unsigned char c = IN1; - - REQUIRE_OUTBUF(1) + unsigned char c = INBYTE1; #ifdef STRICT_BUILD - JISX0201_R_DECODE(c, **outbuf) + JISX0201_R_DECODE(c, writer) #else - if (c < 0x80) **outbuf = c; + if (c < 0x80) OUTCHAR(c); #endif - else JISX0201_K_DECODE(c, **outbuf) + else JISX0201_K_DECODE(c, writer) else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){ unsigned char c1, c2; REQUIRE_INBUF(2) - c2 = IN2; + c2 = INBYTE2; if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) return 1; @@ -512,13 +509,13 @@ DECODER(shift_jis) #ifndef STRICT_BUILD if (c1 == 0x21 && c2 == 0x40) { /* FULL-WIDTH REVERSE SOLIDUS */ - OUT1(0xff3c) - NEXT(2, 1) + OUTCHAR(0xff3c); + NEXT_IN(2); continue; } #endif - TRYMAP_DEC(jisx0208, **outbuf, c1, c2) { - NEXT(2, 1) + TRYMAP_DEC(jisx0208, writer, c1, c2) { + NEXT_IN(2); continue; } else @@ -527,7 +524,7 @@ DECODER(shift_jis) else return 1; - NEXT(1, 1) /* JIS X 0201 */ + NEXT_IN(1); /* JIS X 0201 */ } return 0; @@ -540,30 +537,29 @@ DECODER(shift_jis) ENCODER(shift_jis_2004) { - while (inleft > 0) { - ucs4_t c = IN1; + while (*inpos < inlen) { + Py_UCS4 c = INCHAR1; DBCHAR code = NOCHAR; int c1, c2; Py_ssize_t insize; JISX0201_ENCODE(c, code) - else DECODE_SURROGATE(c) if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) { - WRITE1((unsigned char)code) - NEXT(1, 1) + WRITEBYTE1((unsigned char)code) + NEXT(1, 1); continue; } REQUIRE_OUTBUF(2) - insize = GET_INSIZE(c); + insize = 1; if (code == NOCHAR) { if (c <= 0xffff) { EMULATE_JISX0213_2000_ENCODE_BMP(code, c) else TRYMAP_ENC(jisx0213_bmp, code, c) { if (code == MULTIC) { - if (inleft < 2) { + if (inlen - *inpos < 2) { if (flags & MBENC_FLUSH) { code = find_pairencmap ((ucs2_t)c, 0, @@ -576,8 +572,9 @@ ENCODER(shift_jis_2004) return MBERR_TOOFEW; } else { + Py_UCS4 ch2 = INCHAR2; code = find_pairencmap( - (ucs2_t)c, IN2, + (ucs2_t)c, ch2, jisx0213_pair_encmap, JISX0213_ENCPAIRS); if (code == DBCINV) { @@ -622,10 +619,10 @@ ENCODER(shift_jis_2004) if (c1 & 1) c2 += 0x5e; c1 >>= 1; - OUT1(c1 + (c1 < 0x1f ? 0x81 : 0xc1)) - OUT2(c2 + (c2 < 0x3f ? 0x40 : 0x41)) + OUTBYTE1(c1 + (c1 < 0x1f ? 0x81 : 0xc1)) + OUTBYTE2(c2 + (c2 < 0x3f ? 0x40 : 0x41)) - NEXT(insize, 2) + NEXT(insize, 2); } return 0; @@ -634,16 +631,15 @@ ENCODER(shift_jis_2004) DECODER(shift_jis_2004) { while (inleft > 0) { - unsigned char c = IN1; + unsigned char c = INBYTE1; - REQUIRE_OUTBUF(1) - JISX0201_DECODE(c, **outbuf) + JISX0201_DECODE(c, writer) else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){ unsigned char c1, c2; - ucs4_t code; + Py_UCS4 code; REQUIRE_INBUF(2) - c2 = IN2; + c2 = INBYTE2; if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) return 1; @@ -654,50 +650,47 @@ DECODER(shift_jis_2004) if (c1 < 0x5e) { /* Plane 1 */ c1 += 0x21; - EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf, + EMULATE_JISX0213_2000_DECODE_PLANE1(writer, c1, c2) - else TRYMAP_DEC(jisx0208, **outbuf, c1, c2) { - NEXT_OUT(1) + else TRYMAP_DEC(jisx0208, writer, c1, c2) { } - else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, + else TRYMAP_DEC(jisx0213_1_bmp, writer, c1, c2) { - NEXT_OUT(1) } - else TRYMAP_DEC(jisx0213_1_emp, code, c1, c2) { - WRITEUCS4(EMPBASE | code) + else TRYMAP_DEC_CHAR(jisx0213_1_emp, code, c1, c2) { + OUTCHAR(EMPBASE | code); } - else TRYMAP_DEC(jisx0213_pair, code, c1, c2) { - WRITE2(code >> 16, code & 0xffff) - NEXT_OUT(2) + else TRYMAP_DEC_CHAR(jisx0213_pair, code, c1, c2) { + OUTCHAR2(code >> 16, code & 0xffff); } else return 1; - NEXT_IN(2) + NEXT_IN(2); } else { /* Plane 2 */ if (c1 >= 0x67) c1 += 0x07; else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37; else c1 -= 0x3d; - EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf, + EMULATE_JISX0213_2000_DECODE_PLANE2(writer, c1, c2) - else TRYMAP_DEC(jisx0213_2_bmp, **outbuf, - c1, c2) ; - else TRYMAP_DEC(jisx0213_2_emp, code, c1, c2) { - WRITEUCS4(EMPBASE | code) - NEXT_IN(2) + else TRYMAP_DEC(jisx0213_2_bmp, writer, + c1, c2) { + } else TRYMAP_DEC_CHAR(jisx0213_2_emp, code, c1, c2) { + OUTCHAR(EMPBASE | code); + NEXT_IN(2); continue; } else return 1; - NEXT(2, 1) + NEXT_IN(2); } continue; } else return 1; - NEXT(1, 1) /* JIS X 0201 */ + NEXT_IN(1); /* JIS X 0201 */ } return 0; diff --git a/Modules/cjkcodecs/_codecs_kr.c b/Modules/cjkcodecs/_codecs_kr.c index f5697dd..0c2309d 100644 --- a/Modules/cjkcodecs/_codecs_kr.c +++ b/Modules/cjkcodecs/_codecs_kr.c @@ -33,16 +33,18 @@ static const unsigned char u2cgk_jongseong[28] = { ENCODER(euc_kr) { - while (inleft > 0) { - Py_UNICODE c = IN1; + while (*inpos < inlen) { + Py_UCS4 c = INCHAR1; DBCHAR code; if (c < 0x80) { - WRITE1((unsigned char)c) - NEXT(1, 1) + WRITEBYTE1((unsigned char)c) + NEXT(1, 1); continue; } - UCS4INVALID(c) + + if (c > 0xFFFF) + return 1; REQUIRE_OUTBUF(2) TRYMAP_ENC(cp949, code, c); @@ -50,9 +52,9 @@ ENCODER(euc_kr) if ((code & 0x8000) == 0) { /* KS X 1001 coded character */ - OUT1((code >> 8) | 0x80) - OUT2((code & 0xFF) | 0x80) - NEXT(1, 2) + OUTBYTE1((code >> 8) | 0x80) + OUTBYTE2((code & 0xFF) | 0x80) + NEXT(1, 2); } else { /* Mapping is found in CP949 extension, * but we encode it in KS X 1001:1998 Annex 3, @@ -61,23 +63,23 @@ ENCODER(euc_kr) REQUIRE_OUTBUF(8) /* syllable composition precedence */ - OUT1(EUCKR_JAMO_FIRSTBYTE) - OUT2(EUCKR_JAMO_FILLER) + OUTBYTE1(EUCKR_JAMO_FIRSTBYTE) + OUTBYTE2(EUCKR_JAMO_FILLER) /* All codepoints in CP949 extension are in unicode * Hangul Syllable area. */ assert(0xac00 <= c && c <= 0xd7a3); c -= 0xac00; - OUT3(EUCKR_JAMO_FIRSTBYTE) - OUT4(u2cgk_choseong[c / 588]) - NEXT_OUT(4) + OUTBYTE3(EUCKR_JAMO_FIRSTBYTE) + OUTBYTE4(u2cgk_choseong[c / 588]) + NEXT_OUT(4); - OUT1(EUCKR_JAMO_FIRSTBYTE) - OUT2(u2cgk_jungseong[(c / 28) % 21]) - OUT3(EUCKR_JAMO_FIRSTBYTE) - OUT4(u2cgk_jongseong[c % 28]) - NEXT(1, 4) + OUTBYTE1(EUCKR_JAMO_FIRSTBYTE) + OUTBYTE2(u2cgk_jungseong[(c / 28) % 21]) + OUTBYTE3(EUCKR_JAMO_FIRSTBYTE) + OUTBYTE4(u2cgk_jongseong[c % 28]) + NEXT(1, 4); } } @@ -102,20 +104,18 @@ static const unsigned char cgk2u_jongseong[] = { /* [A1, BE] */ DECODER(euc_kr) { while (inleft > 0) { - unsigned char c = IN1; - - REQUIRE_OUTBUF(1) + unsigned char c = INBYTE1; if (c < 0x80) { - OUT1(c) - NEXT(1, 1) + OUTCHAR(c); + NEXT_IN(1); continue; } REQUIRE_INBUF(2) if (c == EUCKR_JAMO_FIRSTBYTE && - IN2 == EUCKR_JAMO_FILLER) { + INBYTE2 == EUCKR_JAMO_FILLER) { /* KS X 1001:1998 Annex 3 make-up sequence */ DBCHAR cho, jung, jong; @@ -145,11 +145,11 @@ DECODER(euc_kr) if (cho == NONE || jung == NONE || jong == NONE) return 1; - OUT1(0xac00 + cho*588 + jung*28 + jong); - NEXT(8, 1) + OUTCHAR(0xac00 + cho*588 + jung*28 + jong); + NEXT_IN(8); } - else TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) { - NEXT(2, 1) + else TRYMAP_DEC(ksx1001, writer, c ^ 0x80, INBYTE2 ^ 0x80) { + NEXT_IN(2); } else return 1; @@ -166,27 +166,29 @@ DECODER(euc_kr) ENCODER(cp949) { - while (inleft > 0) { - Py_UNICODE c = IN1; + while (*inpos < inlen) { + Py_UCS4 c = INCHAR1; DBCHAR code; if (c < 0x80) { - WRITE1((unsigned char)c) - NEXT(1, 1) + WRITEBYTE1((unsigned char)c) + NEXT(1, 1); continue; } - UCS4INVALID(c) + + if (c > 0xFFFF) + return 1; REQUIRE_OUTBUF(2) TRYMAP_ENC(cp949, code, c); else return 1; - OUT1((code >> 8) | 0x80) + OUTBYTE1((code >> 8) | 0x80) if (code & 0x8000) - OUT2(code & 0xFF) /* MSB set: CP949 */ + OUTBYTE2(code & 0xFF) /* MSB set: CP949 */ else - OUT2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */ - NEXT(1, 2) + OUTBYTE2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */ + NEXT(1, 2); } return 0; @@ -195,22 +197,20 @@ ENCODER(cp949) DECODER(cp949) { while (inleft > 0) { - unsigned char c = IN1; - - REQUIRE_OUTBUF(1) + unsigned char c = INBYTE1; if (c < 0x80) { - OUT1(c) - NEXT(1, 1) + OUTCHAR(c); + NEXT_IN(1); continue; } REQUIRE_INBUF(2) - TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80); - else TRYMAP_DEC(cp949ext, **outbuf, c, IN2); + TRYMAP_DEC(ksx1001, writer, c ^ 0x80, INBYTE2 ^ 0x80); + else TRYMAP_DEC(cp949ext, writer, c, INBYTE2); else return 1; - NEXT(2, 1) + NEXT_IN(2); } return 0; @@ -250,16 +250,18 @@ static const DBCHAR u2johabjamo[] = { ENCODER(johab) { - while (inleft > 0) { - Py_UNICODE c = IN1; + while (*inpos < inlen) { + Py_UCS4 c = INCHAR1; DBCHAR code; if (c < 0x80) { - WRITE1((unsigned char)c) - NEXT(1, 1) + WRITEBYTE1((unsigned char)c) + NEXT(1, 1); continue; } - UCS4INVALID(c) + + if (c > 0xFFFF) + return 1; REQUIRE_OUTBUF(2) @@ -285,9 +287,9 @@ ENCODER(johab) t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) : (c1 - 0x21 + 0x197)); t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21); - OUT1(t1 >> 1) - OUT2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43) - NEXT(1, 2) + OUTBYTE1(t1 >> 1) + OUTBYTE2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43) + NEXT(1, 2); continue; } else @@ -296,9 +298,9 @@ ENCODER(johab) else return 1; - OUT1(code >> 8) - OUT2(code & 0xff) - NEXT(1, 2) + OUTBYTE1(code >> 8) + OUTBYTE2(code & 0xff) + NEXT(1, 2); } return 0; @@ -348,18 +350,16 @@ static const unsigned char johabjamo_jongseong[32] = { DECODER(johab) { while (inleft > 0) { - unsigned char c = IN1, c2; - - REQUIRE_OUTBUF(1) + unsigned char c = INBYTE1, c2; if (c < 0x80) { - OUT1(c) - NEXT(1, 1) + OUTCHAR(c); + NEXT_IN(1); continue; } REQUIRE_INBUF(2) - c2 = IN2; + c2 = INBYTE2; if (c < 0xd8) { /* johab hangul */ @@ -381,33 +381,33 @@ DECODER(johab) if (i_cho == FILL) { if (i_jung == FILL) { if (i_jong == FILL) - OUT1(0x3000) + OUTCHAR(0x3000); else - OUT1(0x3100 | - johabjamo_jongseong[c_jong]) + OUTCHAR(0x3100 | + johabjamo_jongseong[c_jong]); } else { if (i_jong == FILL) - OUT1(0x3100 | - johabjamo_jungseong[c_jung]) + OUTCHAR(0x3100 | + johabjamo_jungseong[c_jung]); else return 1; } } else { if (i_jung == FILL) { if (i_jong == FILL) - OUT1(0x3100 | - johabjamo_choseong[c_cho]) + OUTCHAR(0x3100 | + johabjamo_choseong[c_cho]); else return 1; } else - OUT1(0xac00 + - i_cho * 588 + - i_jung * 28 + - (i_jong == FILL ? 0 : i_jong)) + OUTCHAR(0xac00 + + i_cho * 588 + + i_jung * 28 + + (i_jong == FILL ? 0 : i_jong)); } - NEXT(2, 1) + NEXT_IN(2); } else { /* KS X 1001 except hangul jamos and syllables */ if (c == 0xdf || c > 0xf9 || @@ -424,9 +424,9 @@ DECODER(johab) t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21; t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21; - TRYMAP_DEC(ksx1001, **outbuf, t1, t2); + TRYMAP_DEC(ksx1001, writer, t1, t2); else return 1; - NEXT(2, 1) + NEXT_IN(2); } } } diff --git a/Modules/cjkcodecs/_codecs_tw.c b/Modules/cjkcodecs/_codecs_tw.c index 916298d..80e0b81 100644 --- a/Modules/cjkcodecs/_codecs_tw.c +++ b/Modules/cjkcodecs/_codecs_tw.c @@ -13,26 +13,28 @@ ENCODER(big5) { - while (inleft > 0) { - Py_UNICODE c = **inbuf; + while (*inpos < inlen) { + Py_UCS4 c = INCHAR1; DBCHAR code; if (c < 0x80) { REQUIRE_OUTBUF(1) **outbuf = (unsigned char)c; - NEXT(1, 1) + NEXT(1, 1); continue; } - UCS4INVALID(c) + + if (c > 0xFFFF) + return 1; REQUIRE_OUTBUF(2) TRYMAP_ENC(big5, code, c); else return 1; - OUT1(code >> 8) - OUT2(code & 0xFF) - NEXT(1, 2) + OUTBYTE1(code >> 8) + OUTBYTE2(code & 0xFF) + NEXT(1, 2); } return 0; @@ -41,19 +43,17 @@ ENCODER(big5) DECODER(big5) { while (inleft > 0) { - unsigned char c = IN1; - - REQUIRE_OUTBUF(1) + unsigned char c = INBYTE1; if (c < 0x80) { - OUT1(c) - NEXT(1, 1) + OUTCHAR(c); + NEXT_IN(1); continue; } REQUIRE_INBUF(2) - TRYMAP_DEC(big5, **outbuf, c, IN2) { - NEXT(2, 1) + TRYMAP_DEC(big5, writer, c, INBYTE2) { + NEXT_IN(2); } else return 1; } @@ -68,25 +68,27 @@ DECODER(big5) ENCODER(cp950) { - while (inleft > 0) { - Py_UNICODE c = IN1; + while (*inpos < inlen) { + Py_UCS4 c = INCHAR1; DBCHAR code; if (c < 0x80) { - WRITE1((unsigned char)c) - NEXT(1, 1) + WRITEBYTE1((unsigned char)c) + NEXT(1, 1); continue; } - UCS4INVALID(c) + + if (c > 0xFFFF) + return 1; REQUIRE_OUTBUF(2) TRYMAP_ENC(cp950ext, code, c); else TRYMAP_ENC(big5, code, c); else return 1; - OUT1(code >> 8) - OUT2(code & 0xFF) - NEXT(1, 2) + OUTBYTE1(code >> 8) + OUTBYTE2(code & 0xFF) + NEXT(1, 2); } return 0; @@ -95,23 +97,21 @@ ENCODER(cp950) DECODER(cp950) { while (inleft > 0) { - unsigned char c = IN1; - - REQUIRE_OUTBUF(1) + unsigned char c = INBYTE1; if (c < 0x80) { - OUT1(c) - NEXT(1, 1) + OUTCHAR(c); + NEXT_IN(1); continue; } REQUIRE_INBUF(2) - TRYMAP_DEC(cp950ext, **outbuf, c, IN2); - else TRYMAP_DEC(big5, **outbuf, c, IN2); + TRYMAP_DEC(cp950ext, writer, c, INBYTE2); + else TRYMAP_DEC(big5, writer, c, INBYTE2); else return 1; - NEXT(2, 1) + NEXT_IN(2); } return 0; diff --git a/Modules/cjkcodecs/alg_jisx0201.h b/Modules/cjkcodecs/alg_jisx0201.h index 0bc7db5..98c63e6 100644 --- a/Modules/cjkcodecs/alg_jisx0201.h +++ b/Modules/cjkcodecs/alg_jisx0201.h @@ -10,15 +10,24 @@ JISX0201_R_ENCODE(c, assi) \ else JISX0201_K_ENCODE(c, assi) -#define JISX0201_R_DECODE(c, assi) \ +#define JISX0201_R_DECODE_CHAR(c, assi) \ if ((c) < 0x5c) (assi) = (c); \ else if ((c) == 0x5c) (assi) = 0x00a5; \ else if ((c) < 0x7e) (assi) = (c); \ else if ((c) == 0x7e) (assi) = 0x203e; \ else if ((c) == 0x7f) (assi) = 0x7f; -#define JISX0201_K_DECODE(c, assi) \ +#define JISX0201_R_DECODE(c, writer) \ + if ((c) < 0x5c) OUTCHAR(c); \ + else if ((c) == 0x5c) OUTCHAR(0x00a5); \ + else if ((c) < 0x7e) OUTCHAR(c); \ + else if ((c) == 0x7e) OUTCHAR(0x203e); \ + else if ((c) == 0x7f) OUTCHAR(0x7f); +#define JISX0201_K_DECODE(c, writer) \ if ((c) >= 0xa1 && (c) <= 0xdf) \ - (assi) = 0xfec0 + (c); -#define JISX0201_DECODE(c, assi) \ - JISX0201_R_DECODE(c, assi) \ - else JISX0201_K_DECODE(c, assi) + OUTCHAR(0xfec0 + (c)); +#define JISX0201_K_DECODE_CHAR(c, assi) \ + if ((c) >= 0xa1 && (c) <= 0xdf) \ + (assi) = 0xfec0 + (c); +#define JISX0201_DECODE(c, writer) \ + JISX0201_R_DECODE(c, writer) \ + else JISX0201_K_DECODE(c, writer) diff --git a/Modules/cjkcodecs/cjkcodecs.h b/Modules/cjkcodecs/cjkcodecs.h index ab0682a..18cc02f 100644 --- a/Modules/cjkcodecs/cjkcodecs.h +++ b/Modules/cjkcodecs/cjkcodecs.h @@ -33,7 +33,7 @@ struct dbcs_index { typedef struct dbcs_index decode_map; struct widedbcs_index { - const ucs4_t *map; + const Py_UCS4 *map; unsigned char bottom, top; }; typedef struct widedbcs_index widedecode_map; @@ -56,7 +56,7 @@ struct dbcs_map { }; struct pair_encodemap { - ucs4_t uniseq; + Py_UCS4 uniseq; DBCHAR code; }; @@ -72,7 +72,8 @@ static const struct dbcs_map *mapping_list; #define ENCODER(encoding) \ static Py_ssize_t encoding##_encode( \ MultibyteCodec_State *state, const void *config, \ - const Py_UNICODE **inbuf, Py_ssize_t inleft, \ + int kind, void *data, \ + Py_ssize_t *inpos, Py_ssize_t inlen, \ unsigned char **outbuf, Py_ssize_t outleft, int flags) #define ENCODER_RESET(encoding) \ static Py_ssize_t encoding##_encode_reset( \ @@ -86,28 +87,30 @@ static const struct dbcs_map *mapping_list; static Py_ssize_t encoding##_decode( \ MultibyteCodec_State *state, const void *config, \ const unsigned char **inbuf, Py_ssize_t inleft, \ - Py_UNICODE **outbuf, Py_ssize_t outleft) + _PyUnicodeWriter *writer) #define DECODER_RESET(encoding) \ static Py_ssize_t encoding##_decode_reset( \ MultibyteCodec_State *state, const void *config) -#if Py_UNICODE_SIZE == 4 -#define UCS4INVALID(code) \ - if ((code) > 0xFFFF) \ - return 1; -#else -#define UCS4INVALID(code) \ - if (0) ; -#endif - #define NEXT_IN(i) \ - (*inbuf) += (i); \ - (inleft) -= (i); + do { \ + (*inbuf) += (i); \ + (inleft) -= (i); \ + } while (0) +#define NEXT_INCHAR(i) \ + do { \ + (*inpos) += (i); \ + } while (0) #define NEXT_OUT(o) \ - (*outbuf) += (o); \ - (outleft) -= (o); + do { \ + (*outbuf) += (o); \ + (outleft) -= (o); \ + } while (0) #define NEXT(i, o) \ - NEXT_IN(i) NEXT_OUT(o) + do { \ + NEXT_INCHAR(i); \ + NEXT_OUT(o); \ + } while (0) #define REQUIRE_INBUF(n) \ if (inleft < (n)) \ @@ -116,48 +119,55 @@ static const struct dbcs_map *mapping_list; if (outleft < (n)) \ return MBERR_TOOSMALL; -#define IN1 ((*inbuf)[0]) -#define IN2 ((*inbuf)[1]) -#define IN3 ((*inbuf)[2]) -#define IN4 ((*inbuf)[3]) - -#define OUT1(c) ((*outbuf)[0]) = (c); -#define OUT2(c) ((*outbuf)[1]) = (c); -#define OUT3(c) ((*outbuf)[2]) = (c); -#define OUT4(c) ((*outbuf)[3]) = (c); - -#define WRITE1(c1) \ +#define INBYTE1 ((*inbuf)[0]) +#define INBYTE2 ((*inbuf)[1]) +#define INBYTE3 ((*inbuf)[2]) +#define INBYTE4 ((*inbuf)[3]) + +#define INCHAR1 PyUnicode_READ(kind, data, *inpos) +#define INCHAR2 PyUnicode_READ(kind, data, *inpos + 1) + +#define OUTCHAR(c) \ + do { \ + if (_PyUnicodeWriter_WriteChar(writer, (c)) < 0) \ + return MBERR_TOOSMALL; \ + } while (0) + +#define OUTCHAR2(c1, c2) \ + do { \ + Py_UCS4 _c1 = (c1); \ + Py_UCS4 _c2 = (c2); \ + if (_PyUnicodeWriter_Prepare(writer, 2, Py_MAX(_c1, c2)) < 0) \ + return MBERR_TOOSMALL; \ + PyUnicode_WRITE(writer->kind, writer->data, writer->pos, _c1); \ + PyUnicode_WRITE(writer->kind, writer->data, writer->pos + 1, _c2); \ + writer->pos += 2; \ + } while (0) + +#define OUTBYTE1(c) ((*outbuf)[0]) = (c); +#define OUTBYTE2(c) ((*outbuf)[1]) = (c); +#define OUTBYTE3(c) ((*outbuf)[2]) = (c); +#define OUTBYTE4(c) ((*outbuf)[3]) = (c); + +#define WRITEBYTE1(c1) \ REQUIRE_OUTBUF(1) \ (*outbuf)[0] = (c1); -#define WRITE2(c1, c2) \ +#define WRITEBYTE2(c1, c2) \ REQUIRE_OUTBUF(2) \ (*outbuf)[0] = (c1); \ (*outbuf)[1] = (c2); -#define WRITE3(c1, c2, c3) \ +#define WRITEBYTE3(c1, c2, c3) \ REQUIRE_OUTBUF(3) \ (*outbuf)[0] = (c1); \ (*outbuf)[1] = (c2); \ (*outbuf)[2] = (c3); -#define WRITE4(c1, c2, c3, c4) \ +#define WRITEBYTE4(c1, c2, c3, c4) \ REQUIRE_OUTBUF(4) \ (*outbuf)[0] = (c1); \ (*outbuf)[1] = (c2); \ (*outbuf)[2] = (c3); \ (*outbuf)[3] = (c4); -#if Py_UNICODE_SIZE == 2 -# define WRITEUCS4(c) \ - REQUIRE_OUTBUF(2) \ - (*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10); \ - (*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff); \ - NEXT_OUT(2) -#else -# define WRITEUCS4(c) \ - REQUIRE_OUTBUF(1) \ - **outbuf = (Py_UNICODE)(c); \ - NEXT_OUT(1) -#endif - #define _TRYMAP_ENC(m, assi, val) \ ((m)->map != NULL && (val) >= (m)->bottom && \ (val)<= (m)->top && ((assi) = (m)->map[(val) - \ @@ -167,39 +177,41 @@ static const struct dbcs_map *mapping_list; #define TRYMAP_ENC(charset, assi, uni) \ if TRYMAP_ENC_COND(charset, assi, uni) -#define _TRYMAP_DEC(m, assi, val) \ - ((m)->map != NULL && (val) >= (m)->bottom && \ - (val)<= (m)->top && ((assi) = (m)->map[(val) - \ - (m)->bottom]) != UNIINV) -#define TRYMAP_DEC(charset, assi, c1, c2) \ - if _TRYMAP_DEC(&charset##_decmap[c1], assi, c2) +Py_LOCAL_INLINE(int) +_TRYMAP_DEC_WRITE(_PyUnicodeWriter *writer, Py_UCS4 c) +{ + if (c == UNIINV || _PyUnicodeWriter_WriteChar(writer, c) < 0) + return UNIINV; + else + return c; +} -#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) \ - ((m)->map != NULL && (val) >= (m)->bottom && \ - (val)<= (m)->top && \ - ((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \ +#define _TRYMAP_DEC(m, writer, val) \ + ((m)->map != NULL && \ + (val) >= (m)->bottom && \ + (val)<= (m)->top && \ + _TRYMAP_DEC_WRITE(writer, (m)->map[(val) - (m)->bottom]) != UNIINV) +#define _TRYMAP_DEC_CHAR(m, assi, val) \ + ((m)->map != NULL && \ + (val) >= (m)->bottom && \ + (val)<= (m)->top && \ + ((assi) = (m)->map[(val) - (m)->bottom]) != UNIINV) +#define TRYMAP_DEC(charset, writer, c1, c2) \ + if _TRYMAP_DEC(&charset##_decmap[c1], writer, c2) +#define TRYMAP_DEC_CHAR(charset, assi, c1, c2) \ + if _TRYMAP_DEC_CHAR(&charset##_decmap[c1], assi, c2) + +#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) \ + ((m)->map != NULL && (val) >= (m)->bottom && \ + (val)<= (m)->top && \ + ((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \ (((asshi) = (m)->map[((val) - (m)->bottom)*3 + 1]), 1) && \ (((asslo) = (m)->map[((val) - (m)->bottom)*3 + 2]), 1)) #define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni) \ if _TRYMAP_ENC_MPLANE(&charset##_encmap[(uni) >> 8], \ assplane, asshi, asslo, (uni) & 0xff) -#define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2) \ - if _TRYMAP_DEC(&charset##_decmap[plane][c1], assi, c2) - -#if Py_UNICODE_SIZE == 2 -#define DECODE_SURROGATE(c) \ - if (c >> 10 == 0xd800 >> 10) { /* high surrogate */ \ - REQUIRE_INBUF(2) \ - if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ \ - c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \ - ((ucs4_t)(IN2) - 0xdc00); \ - } \ - } -#define GET_INSIZE(c) ((c) > 0xffff ? 2 : 1) -#else -#define DECODE_SURROGATE(c) {;} -#define GET_INSIZE(c) 1 -#endif +#define TRYMAP_DEC_MPLANE(charset, writer, plane, c1, c2) \ + if _TRYMAP_DEC(&charset##_decmap[plane][c1], writer, c2) #define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = { #define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL}, @@ -324,7 +336,7 @@ find_pairencmap(ucs2_t body, ucs2_t modifier, const struct pair_encodemap *haystack, int haystacksize) { int pos, min, max; - ucs4_t value = body << 16 | modifier; + Py_UCS4 value = body << 16 | modifier; min = 0; max = haystacksize; diff --git a/Modules/cjkcodecs/emu_jisx0213_2000.h b/Modules/cjkcodecs/emu_jisx0213_2000.h index 4227fb2..877337e 100644 --- a/Modules/cjkcodecs/emu_jisx0213_2000.h +++ b/Modules/cjkcodecs/emu_jisx0213_2000.h @@ -38,6 +38,9 @@ ((c1) == 0x7E && (c2) == 0x7E))) \ return EMULATE_JISX0213_2000_DECODE_INVALID; -#define EMULATE_JISX0213_2000_DECODE_PLANE2(assi, c1, c2) \ +#define EMULATE_JISX0213_2000_DECODE_PLANE2(writer, c1, c2) \ + if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B) \ + OUTCHAR(0x9B1D); +#define EMULATE_JISX0213_2000_DECODE_PLANE2_CHAR(assi, c1, c2) \ if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B) \ (assi) = 0x9B1D; diff --git a/Modules/cjkcodecs/mappings_cn.h b/Modules/cjkcodecs/mappings_cn.h index a6dcebf..1f8c299 100644 --- a/Modules/cjkcodecs/mappings_cn.h +++ b/Modules/cjkcodecs/mappings_cn.h @@ -4049,7 +4049,7 @@ __gb18030ext_encmap+3126,0,100},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0 static const struct _gb18030_to_unibmp_ranges { - Py_UNICODE first, last; + Py_UCS4 first, last; DBCHAR base; } gb18030_to_unibmp_ranges[] = { {128,163,0},{165,166,36},{169,175,38},{178,182,45},{184,214,50},{216,223,81},{ diff --git a/Modules/cjkcodecs/mappings_jisx0213_pair.h b/Modules/cjkcodecs/mappings_jisx0213_pair.h index eda8e9e..729e4bc 100644 --- a/Modules/cjkcodecs/mappings_jisx0213_pair.h +++ b/Modules/cjkcodecs/mappings_jisx0213_pair.h @@ -3,7 +3,7 @@ static const struct widedbcs_index *jisx0213_pair_decmap; static const struct pair_encodemap *jisx0213_pair_encmap; #else -static const ucs4_t __jisx0213_pair_decmap[49] = { +static const Py_UCS4 __jisx0213_pair_decmap[49] = { 810234010,810365082,810496154,810627226,810758298,816525466,816656538, 816787610,816918682,817049754,817574042,818163866,818426010,838283418, 15074048,U,U,U,39060224,39060225,42730240,42730241,39387904,39387905,39453440, diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c index c032cdb..b449953 100644 --- a/Modules/cjkcodecs/multibytecodec.c +++ b/Modules/cjkcodecs/multibytecodec.c @@ -10,15 +10,16 @@ #include "multibytecodec.h" typedef struct { - const Py_UNICODE *inbuf, *inbuf_top, *inbuf_end; + PyObject *inobj; + Py_ssize_t inpos, inlen; unsigned char *outbuf, *outbuf_end; PyObject *excobj, *outobj; } MultibyteEncodeBuffer; typedef struct { const unsigned char *inbuf, *inbuf_top, *inbuf_end; - Py_UNICODE *outbuf, *outbuf_end; - PyObject *excobj, *outobj; + PyObject *excobj; + _PyUnicodeWriter writer; } MultibyteDecodeBuffer; PyDoc_STRVAR(MultibyteCodec_Encode__doc__, @@ -45,7 +46,7 @@ static char *incrementalkwarglist[] = {"input", "final", NULL}; static char *streamkwarglist[] = {"stream", "errors", NULL}; static PyObject *multibytecodec_encode(MultibyteCodec *, - MultibyteCodec_State *, const Py_UNICODE **, Py_ssize_t, + MultibyteCodec_State *, PyObject *, Py_ssize_t *, PyObject *, int); #define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */ @@ -197,29 +198,6 @@ expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize) goto errorexit; \ } -static int -expand_decodebuffer(MultibyteDecodeBuffer *buf, Py_ssize_t esize) -{ - Py_ssize_t orgpos, orgsize; - - orgpos = (Py_ssize_t)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj)); - orgsize = PyUnicode_GET_SIZE(buf->outobj); - if (PyUnicode_Resize(&buf->outobj, orgsize + ( - esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1) - return -1; - - buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj) + orgpos; - buf->outbuf_end = PyUnicode_AS_UNICODE(buf->outobj) - + PyUnicode_GET_SIZE(buf->outobj); - - return 0; -} -#define REQUIRE_DECODEBUFFER(buf, s) { \ - if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \ - if (expand_decodebuffer(buf, s) == -1) \ - goto errorexit; \ -} - /** * MultibyteCodec object @@ -247,7 +225,7 @@ multibytecodec_encerror(MultibyteCodec *codec, return 0; /* retry it */ case MBERR_TOOFEW: reason = "incomplete multibyte sequence"; - esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); + esize = (Py_ssize_t)buf->inpos; break; case MBERR_INTERNAL: PyErr_SetString(PyExc_RuntimeError, @@ -261,14 +239,24 @@ multibytecodec_encerror(MultibyteCodec *codec, } if (errors == ERROR_REPLACE) { - const Py_UNICODE replchar = '?', *inbuf = &replchar; + PyObject *replchar; Py_ssize_t r; + Py_ssize_t inpos; + int kind; + void *data; + replchar = PyUnicode_FromOrdinal('?'); + if (replchar == NULL) + goto errorexit; + kind = PyUnicode_KIND(replchar); + data = PyUnicode_DATA(replchar); + + inpos = 0; for (;;) { - Py_ssize_t outleft; + Py_ssize_t outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf); - outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf); - r = codec->encode(state, codec->config, &inbuf, 1, + r = codec->encode(state, codec->config, + kind, data, &inpos, 1, &buf->outbuf, outleft, 0); if (r == MBERR_TOOSMALL) { REQUIRE_ENCODEBUFFER(buf, -1); @@ -278,25 +266,27 @@ multibytecodec_encerror(MultibyteCodec *codec, break; } + Py_DECREF(replchar); + if (r != 0) { REQUIRE_ENCODEBUFFER(buf, 1); *buf->outbuf++ = '?'; } } if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) { - buf->inbuf += esize; + buf->inpos += esize; return 0; } - start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top); + start = (Py_ssize_t)buf->inpos; end = start + esize; /* use cached exception object if available */ if (buf->excobj == NULL) { - buf->excobj = PyUnicodeEncodeError_Create(codec->encoding, - buf->inbuf_top, - buf->inbuf_end - buf->inbuf_top, - start, end, reason); + buf->excobj = PyObject_CallFunction(PyExc_UnicodeEncodeError, + "sOnns", + codec->encoding, buf->inobj, + start, end, reason); if (buf->excobj == NULL) goto errorexit; } @@ -325,10 +315,10 @@ multibytecodec_encerror(MultibyteCodec *codec, } if (PyUnicode_Check(tobj)) { - const Py_UNICODE *uraw = PyUnicode_AS_UNICODE(tobj); + Py_ssize_t inpos; - retstr = multibytecodec_encode(codec, state, &uraw, - PyUnicode_GET_SIZE(tobj), ERROR_STRICT, + retstr = multibytecodec_encode(codec, state, tobj, + &inpos, ERROR_STRICT, MBENC_FLUSH); if (retstr == NULL) goto errorexit; @@ -347,15 +337,15 @@ multibytecodec_encerror(MultibyteCodec *codec, newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1)); if (newpos < 0 && !PyErr_Occurred()) - newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top); - if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) { + newpos += (Py_ssize_t)buf->inlen; + if (newpos < 0 || newpos > buf->inlen) { PyErr_Clear(); PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", newpos); goto errorexit; } - buf->inbuf = buf->inbuf_top + newpos; + buf->inpos = newpos; Py_DECREF(retobj); Py_DECREF(retstr); @@ -374,7 +364,7 @@ multibytecodec_decerror(MultibyteCodec *codec, PyObject *errors, Py_ssize_t e) { PyObject *retobj = NULL, *retuni = NULL; - Py_ssize_t retunisize, newpos; + Py_ssize_t newpos; const char *reason; Py_ssize_t esize, start, end; @@ -385,7 +375,6 @@ multibytecodec_decerror(MultibyteCodec *codec, else { switch (e) { case MBERR_TOOSMALL: - REQUIRE_DECODEBUFFER(buf, -1); return 0; /* retry it */ case MBERR_TOOFEW: reason = "incomplete multibyte sequence"; @@ -403,8 +392,9 @@ multibytecodec_decerror(MultibyteCodec *codec, } if (errors == ERROR_REPLACE) { - REQUIRE_DECODEBUFFER(buf, 1); - *buf->outbuf++ = Py_UNICODE_REPLACEMENT_CHARACTER; + if (_PyUnicodeWriter_WriteChar(&buf->writer, + Py_UNICODE_REPLACEMENT_CHARACTER) < 0) + goto errorexit; } if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) { buf->inbuf += esize; @@ -447,15 +437,8 @@ multibytecodec_decerror(MultibyteCodec *codec, goto errorexit; } - if (PyUnicode_AsUnicode(retuni) == NULL) + if (_PyUnicodeWriter_WriteStr(&buf->writer, retuni) < 0) goto errorexit; - retunisize = PyUnicode_GET_SIZE(retuni); - if (retunisize > 0) { - REQUIRE_DECODEBUFFER(buf, retunisize); - memcpy((char *)buf->outbuf, PyUnicode_AS_UNICODE(retuni), - retunisize * Py_UNICODE_SIZE); - buf->outbuf += retunisize; - } newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1)); if (newpos < 0 && !PyErr_Occurred()) @@ -479,19 +462,29 @@ errorexit: static PyObject * multibytecodec_encode(MultibyteCodec *codec, MultibyteCodec_State *state, - const Py_UNICODE **data, Py_ssize_t datalen, + PyObject *text, Py_ssize_t *inpos_t, PyObject *errors, int flags) { MultibyteEncodeBuffer buf; Py_ssize_t finalsize, r = 0; + Py_ssize_t datalen; + int kind; + void *data; + + if (PyUnicode_READY(text) < 0) + return NULL; + datalen = PyUnicode_GET_LENGTH(text); if (datalen == 0 && !(flags & MBENC_RESET)) return PyBytes_FromStringAndSize(NULL, 0); buf.excobj = NULL; buf.outobj = NULL; - buf.inbuf = buf.inbuf_top = *data; - buf.inbuf_end = buf.inbuf_top + datalen; + buf.inobj = text; /* borrowed reference */ + buf.inpos = 0; + buf.inlen = datalen; + kind = PyUnicode_KIND(buf.inobj); + data = PyUnicode_DATA(buf.inobj); if (datalen > (PY_SSIZE_T_MAX - 16) / 2) { PyErr_NoMemory(); @@ -504,14 +497,14 @@ multibytecodec_encode(MultibyteCodec *codec, buf.outbuf = (unsigned char *)PyBytes_AS_STRING(buf.outobj); buf.outbuf_end = buf.outbuf + PyBytes_GET_SIZE(buf.outobj); - while (buf.inbuf < buf.inbuf_end) { - Py_ssize_t inleft, outleft; - + while (buf.inpos < buf.inlen) { /* we don't reuse inleft and outleft here. * error callbacks can relocate the cursor anywhere on buffer*/ - inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf); - outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); - r = codec->encode(state, codec->config, &buf.inbuf, inleft, + Py_ssize_t outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); + + r = codec->encode(state, codec->config, + kind, data, + &buf.inpos, buf.inlen, &buf.outbuf, outleft, flags); if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH))) break; @@ -542,7 +535,8 @@ multibytecodec_encode(MultibyteCodec *codec, if (_PyBytes_Resize(&buf.outobj, finalsize) == -1) goto errorexit; - *data = buf.inbuf; + if (inpos_t) + *inpos_t = buf.inpos; Py_XDECREF(buf.excobj); return buf.outobj; @@ -557,7 +551,6 @@ MultibyteCodec_Encode(MultibyteCodecObject *self, PyObject *args, PyObject *kwargs) { MultibyteCodec_State state; - Py_UNICODE *data; PyObject *errorcb, *r, *arg, *ucvt; const char *errors = NULL; Py_ssize_t datalen; @@ -580,11 +573,11 @@ MultibyteCodec_Encode(MultibyteCodecObject *self, } } - data = PyUnicode_AsUnicodeAndSize(arg, &datalen); - if (data == NULL) { + if (PyUnicode_READY(arg) < 0) { Py_XDECREF(ucvt); return NULL; } + datalen = PyUnicode_GET_LENGTH(arg); errorcb = internal_error_callback(errors); if (errorcb == NULL) { @@ -596,7 +589,7 @@ MultibyteCodec_Encode(MultibyteCodecObject *self, self->codec->encinit(&state, self->codec->config) != 0) goto errorexit; r = multibytecodec_encode(self->codec, &state, - (const Py_UNICODE **)&data, datalen, errorcb, + arg, NULL, errorcb, MBENC_FLUSH | MBENC_RESET); if (r == NULL) goto errorexit; @@ -617,10 +610,10 @@ MultibyteCodec_Decode(MultibyteCodecObject *self, { MultibyteCodec_State state; MultibyteDecodeBuffer buf; - PyObject *errorcb; + PyObject *errorcb, *res; Py_buffer pdata; const char *data, *errors = NULL; - Py_ssize_t datalen, finalsize; + Py_ssize_t datalen; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|z:decode", codeckwarglist, &pdata, &errors)) @@ -640,29 +633,23 @@ MultibyteCodec_Decode(MultibyteCodecObject *self, return make_tuple(PyUnicode_New(0, 0), 0); } + _PyUnicodeWriter_Init(&buf.writer); + buf.writer.min_length = datalen; buf.excobj = NULL; buf.inbuf = buf.inbuf_top = (unsigned char *)data; buf.inbuf_end = buf.inbuf_top + datalen; - buf.outobj = PyUnicode_FromUnicode(NULL, datalen); - if (buf.outobj == NULL) - goto errorexit; - buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj); - if (buf.outbuf == NULL) - goto errorexit; - buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj); if (self->codec->decinit != NULL && self->codec->decinit(&state, self->codec->config) != 0) goto errorexit; while (buf.inbuf < buf.inbuf_end) { - Py_ssize_t inleft, outleft, r; + Py_ssize_t inleft, r; inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf); - outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); r = self->codec->decode(&state, self->codec->config, - &buf.inbuf, inleft, &buf.outbuf, outleft); + &buf.inbuf, inleft, &buf.writer); if (r == 0) break; else if (multibytecodec_decerror(self->codec, &state, @@ -670,23 +657,20 @@ MultibyteCodec_Decode(MultibyteCodecObject *self, goto errorexit; } - finalsize = (Py_ssize_t)(buf.outbuf - - PyUnicode_AS_UNICODE(buf.outobj)); - - if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) - if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) - goto errorexit; + res = _PyUnicodeWriter_Finish(&buf.writer); + if (res == NULL) + goto errorexit; PyBuffer_Release(&pdata); Py_XDECREF(buf.excobj); ERROR_DECREF(errorcb); - return make_tuple(buf.outobj, datalen); + return make_tuple(res, datalen); errorexit: PyBuffer_Release(&pdata); ERROR_DECREF(errorcb); Py_XDECREF(buf.excobj); - Py_XDECREF(buf.outobj); + _PyUnicodeWriter_Dealloc(&buf.writer); return NULL; } @@ -752,9 +736,9 @@ encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx, PyObject *unistr, int final) { PyObject *ucvt, *r = NULL; - Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL; - Py_ssize_t datalen, origpending; - wchar_t *data; + PyObject *inbuf = NULL; + Py_ssize_t inpos, datalen; + PyObject *origpending = NULL; if (PyUnicode_Check(unistr)) ucvt = NULL; @@ -770,69 +754,66 @@ encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx, } } - data = PyUnicode_AsUnicodeAndSize(unistr, &datalen); - if (data == NULL) - goto errorexit; - origpending = ctx->pendingsize; + if (ctx->pending) { + PyObject *inbuf_tmp; - if (origpending > 0) { - if (datalen > PY_SSIZE_T_MAX - ctx->pendingsize) { - PyErr_NoMemory(); - /* inbuf_tmp == NULL */ - goto errorexit; - } - inbuf_tmp = PyMem_New(Py_UNICODE, datalen + ctx->pendingsize); + Py_INCREF(ctx->pending); + origpending = ctx->pending; + + Py_INCREF(ctx->pending); + inbuf_tmp = ctx->pending; + PyUnicode_Append(&inbuf_tmp, unistr); if (inbuf_tmp == NULL) goto errorexit; - memcpy(inbuf_tmp, ctx->pending, - Py_UNICODE_SIZE * ctx->pendingsize); - memcpy(inbuf_tmp + ctx->pendingsize, - PyUnicode_AS_UNICODE(unistr), - Py_UNICODE_SIZE * datalen); - datalen += ctx->pendingsize; - ctx->pendingsize = 0; + Py_CLEAR(ctx->pending); inbuf = inbuf_tmp; } - else - inbuf = (Py_UNICODE *)PyUnicode_AS_UNICODE(unistr); + else { + origpending = NULL; - inbuf_end = inbuf + datalen; + Py_INCREF(unistr); + inbuf = unistr; + } + if (PyUnicode_READY(inbuf) < 0) + goto errorexit; + inpos = 0; + datalen = PyUnicode_GET_LENGTH(inbuf); r = multibytecodec_encode(ctx->codec, &ctx->state, - (const Py_UNICODE **)&inbuf, datalen, - ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0); + inbuf, &inpos, + ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0); if (r == NULL) { /* recover the original pending buffer */ - if (origpending > 0) - memcpy(ctx->pending, inbuf_tmp, - Py_UNICODE_SIZE * origpending); - ctx->pendingsize = origpending; + Py_CLEAR(ctx->pending); + ctx->pending = origpending; + origpending = NULL; goto errorexit; } + Py_XDECREF(origpending); - if (inbuf < inbuf_end) { - ctx->pendingsize = (Py_ssize_t)(inbuf_end - inbuf); - if (ctx->pendingsize > MAXENCPENDING) { + if (inpos < datalen) { + if (datalen - inpos > MAXENCPENDING) { /* normal codecs can't reach here */ - ctx->pendingsize = 0; PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow"); goto errorexit; } - memcpy(ctx->pending, inbuf, - ctx->pendingsize * Py_UNICODE_SIZE); + ctx->pending = PyUnicode_Substring(inbuf, inpos, datalen); + if (ctx->pending == NULL) { + /* normal codecs can't reach here */ + goto errorexit; + } } - if (inbuf_tmp != NULL) - PyMem_Del(inbuf_tmp); + Py_DECREF(inbuf); Py_XDECREF(ucvt); return r; errorexit: - if (inbuf_tmp != NULL) - PyMem_Del(inbuf_tmp); Py_XDECREF(r); Py_XDECREF(ucvt); + Py_XDECREF(origpending); + Py_XDECREF(inbuf); return NULL; } @@ -859,17 +840,7 @@ decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data, { buf->inbuf = buf->inbuf_top = (const unsigned char *)data; buf->inbuf_end = buf->inbuf_top + size; - if (buf->outobj == NULL) { /* only if outobj is not allocated yet */ - buf->outobj = PyUnicode_FromUnicode(NULL, size); - if (buf->outobj == NULL) - return -1; - buf->outbuf = PyUnicode_AsUnicode(buf->outobj); - if (buf->outbuf == NULL) - return -1; - buf->outbuf_end = buf->outbuf + - PyUnicode_GET_SIZE(buf->outobj); - } - + buf->writer.min_length += size; return 0; } @@ -878,14 +849,13 @@ decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx, MultibyteDecodeBuffer *buf) { while (buf->inbuf < buf->inbuf_end) { - Py_ssize_t inleft, outleft; + Py_ssize_t inleft; Py_ssize_t r; inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); - outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf); r = ctx->codec->decode(&ctx->state, ctx->codec->config, - &buf->inbuf, inleft, &buf->outbuf, outleft); + &buf->inbuf, inleft, &buf->writer); if (r == 0 || r == MBERR_TOOFEW) break; else if (multibytecodec_decerror(ctx->codec, &ctx->state, @@ -927,7 +897,7 @@ mbiencoder_reset(MultibyteIncrementalEncoderObject *self) if (r != 0) return NULL; } - self->pendingsize = 0; + Py_CLEAR(self->pending); Py_RETURN_NONE; } @@ -963,7 +933,7 @@ mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) } self->codec = ((MultibyteCodecObject *)codec)->codec; - self->pendingsize = 0; + self->pending = NULL; self->errors = internal_error_callback(errors); if (self->errors == NULL) goto errorexit; @@ -1058,8 +1028,9 @@ mbidecoder_decode(MultibyteIncrementalDecoderObject *self, MultibyteDecodeBuffer buf; char *data, *wdata = NULL; Py_buffer pdata; - Py_ssize_t wsize, finalsize = 0, size, origpending; + Py_ssize_t wsize, size, origpending; int final = 0; + PyObject *res; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|i:decode", incrementalkwarglist, &pdata, &final)) @@ -1067,7 +1038,8 @@ mbidecoder_decode(MultibyteIncrementalDecoderObject *self, data = pdata.buf; size = pdata.len; - buf.outobj = buf.excobj = NULL; + _PyUnicodeWriter_Init(&buf.writer); + buf.excobj = NULL; origpending = self->pendingsize; if (self->pendingsize == 0) { @@ -1109,23 +1081,22 @@ mbidecoder_decode(MultibyteIncrementalDecoderObject *self, goto errorexit; } - finalsize = (Py_ssize_t)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj)); - if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) - if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) - goto errorexit; + res = _PyUnicodeWriter_Finish(&buf.writer); + if (res == NULL) + goto errorexit; PyBuffer_Release(&pdata); if (wdata != data) PyMem_Del(wdata); Py_XDECREF(buf.excobj); - return buf.outobj; + return res; errorexit: PyBuffer_Release(&pdata); if (wdata != NULL && wdata != data) PyMem_Del(wdata); Py_XDECREF(buf.excobj); - Py_XDECREF(buf.outobj); + _PyUnicodeWriter_Dealloc(&buf.writer); return NULL; } @@ -1265,13 +1236,14 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self, const char *method, Py_ssize_t sizehint) { MultibyteDecodeBuffer buf; - PyObject *cres; - Py_ssize_t rsize, finalsize = 0; + PyObject *cres, *res; + Py_ssize_t rsize; if (sizehint == 0) return PyUnicode_New(0, 0); - buf.outobj = buf.excobj = NULL; + _PyUnicodeWriter_Init(&buf.writer); + buf.excobj = NULL; cres = NULL; for (;;) { @@ -1303,19 +1275,19 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self, if (PyBytes_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) { PyErr_NoMemory(); goto errorexit; - } - rsize = PyBytes_GET_SIZE(cres) + self->pendingsize; - ctr = PyBytes_FromStringAndSize(NULL, rsize); - if (ctr == NULL) - goto errorexit; - ctrdata = PyBytes_AS_STRING(ctr); - memcpy(ctrdata, self->pending, self->pendingsize); - memcpy(ctrdata + self->pendingsize, - PyBytes_AS_STRING(cres), - PyBytes_GET_SIZE(cres)); - Py_DECREF(cres); - cres = ctr; - self->pendingsize = 0; + } + rsize = PyBytes_GET_SIZE(cres) + self->pendingsize; + ctr = PyBytes_FromStringAndSize(NULL, rsize); + if (ctr == NULL) + goto errorexit; + ctrdata = PyBytes_AS_STRING(ctr); + memcpy(ctrdata, self->pending, self->pendingsize); + memcpy(ctrdata + self->pendingsize, + PyBytes_AS_STRING(cres), + PyBytes_GET_SIZE(cres)); + Py_DECREF(cres); + cres = ctr; + self->pendingsize = 0; } rsize = PyBytes_GET_SIZE(cres); @@ -1340,29 +1312,27 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self, goto errorexit; } - finalsize = (Py_ssize_t)(buf.outbuf - - PyUnicode_AS_UNICODE(buf.outobj)); Py_DECREF(cres); cres = NULL; - if (sizehint < 0 || finalsize != 0 || rsize == 0) + if (sizehint < 0 || buf.writer.pos != 0 || rsize == 0) break; sizehint = 1; /* read 1 more byte and retry */ } - if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) - if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) - goto errorexit; + res = _PyUnicodeWriter_Finish(&buf.writer); + if (res == NULL) + goto errorexit; Py_XDECREF(cres); Py_XDECREF(buf.excobj); - return buf.outobj; + return res; errorexit: Py_XDECREF(cres); Py_XDECREF(buf.excobj); - Py_XDECREF(buf.outobj); + _PyUnicodeWriter_Dealloc(&buf.writer); return NULL; } @@ -1649,18 +1619,16 @@ mbstreamwriter_writelines(MultibyteStreamWriterObject *self, PyObject *lines) static PyObject * mbstreamwriter_reset(MultibyteStreamWriterObject *self) { - const Py_UNICODE *pending; PyObject *pwrt; - pending = self->pending; pwrt = multibytecodec_encode(self->codec, &self->state, - &pending, self->pendingsize, self->errors, + self->pending, NULL, self->errors, MBENC_FLUSH | MBENC_RESET); /* some pending buffer can be truncated when UnicodeEncodeError is * raised on 'strict' mode. but, 'reset' method is designed to * reset the pending buffer or states so failed string sequence * ought to be missed */ - self->pendingsize = 0; + Py_CLEAR(self->pending); if (pwrt == NULL) return NULL; @@ -1706,7 +1674,7 @@ mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds) self->codec = ((MultibyteCodecObject *)codec)->codec; self->stream = stream; Py_INCREF(stream); - self->pendingsize = 0; + self->pending = NULL; self->errors = internal_error_callback(errors); if (self->errors == NULL) goto errorexit; diff --git a/Modules/cjkcodecs/multibytecodec.h b/Modules/cjkcodecs/multibytecodec.h index 1b6ef55..3050aeb 100644 --- a/Modules/cjkcodecs/multibytecodec.h +++ b/Modules/cjkcodecs/multibytecodec.h @@ -10,12 +10,6 @@ extern "C" { #endif -#ifdef uint32_t -typedef uint32_t ucs4_t; -#else -typedef unsigned int ucs4_t; -#endif - #ifdef uint16_t typedef uint16_t ucs2_t, DBCHAR; #else @@ -27,13 +21,14 @@ typedef union { int i; unsigned char c[8]; ucs2_t u2[4]; - ucs4_t u4[2]; + Py_UCS4 u4[2]; } MultibyteCodec_State; typedef int (*mbcodec_init)(const void *config); typedef Py_ssize_t (*mbencode_func)(MultibyteCodec_State *state, const void *config, - const Py_UNICODE **inbuf, Py_ssize_t inleft, + int kind, void *data, + Py_ssize_t *inpos, Py_ssize_t inlen, unsigned char **outbuf, Py_ssize_t outleft, int flags); typedef int (*mbencodeinit_func)(MultibyteCodec_State *state, @@ -44,7 +39,7 @@ typedef Py_ssize_t (*mbencodereset_func)(MultibyteCodec_State *state, typedef Py_ssize_t (*mbdecode_func)(MultibyteCodec_State *state, const void *config, const unsigned char **inbuf, Py_ssize_t inleft, - Py_UNICODE **outbuf, Py_ssize_t outleft); + _PyUnicodeWriter *writer); typedef int (*mbdecodeinit_func)(MultibyteCodec_State *state, const void *config); typedef Py_ssize_t (*mbdecodereset_func)(MultibyteCodec_State *state, @@ -81,8 +76,7 @@ typedef struct { #define MAXENCPENDING 2 #define _MultibyteStatefulEncoder_HEAD \ _MultibyteStatefulCodec_HEAD \ - Py_UNICODE pending[MAXENCPENDING]; \ - Py_ssize_t pendingsize; + PyObject *pending; typedef struct { _MultibyteStatefulEncoder_HEAD } MultibyteStatefulEncoderContext; |