diff options
author | Victor Stinner <victor.stinner@gmail.com> | 2013-04-14 00:06:32 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@gmail.com> | 2013-04-14 00:06:32 (GMT) |
commit | d949126995a7ff63590285aa816da65d97a31403 (patch) | |
tree | 5fae129dff24d0a40c415b2a8d1559ff613aceea /Modules/cjkcodecs/_codecs_iso2022.c | |
parent | 71557596b26e9c899e83adc99659732097097c4e (diff) | |
download | cpython-d949126995a7ff63590285aa816da65d97a31403.zip cpython-d949126995a7ff63590285aa816da65d97a31403.tar.gz cpython-d949126995a7ff63590285aa816da65d97a31403.tar.bz2 |
Issue #17693: CJK encoders now use the new Unicode API (PEP 393)
Diffstat (limited to 'Modules/cjkcodecs/_codecs_iso2022.c')
-rw-r--r-- | Modules/cjkcodecs/_codecs_iso2022.c | 113 |
1 files changed, 51 insertions, 62 deletions
diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c index ae14677..bb63835 100644 --- a/Modules/cjkcodecs/_codecs_iso2022.c +++ b/Modules/cjkcodecs/_codecs_iso2022.c @@ -141,13 +141,13 @@ ENCODER_INIT(iso2022) ENCODER_RESET(iso2022) { if (STATE_GETFLAG(F_SHIFTED)) { - WRITE1(SI) - NEXT_OUT(1) + WRITEBYTE1(SI) + NEXT_OUT(1); STATE_CLEARFLAG(F_SHIFTED) } if (STATE_G0 != CHARSET_ASCII) { - WRITE3(ESC, '(', 'B') - NEXT_OUT(3) + WRITEBYTE3(ESC, '(', 'B') + NEXT_OUT(3); STATE_SETG0(CHARSET_ASCII) } return 0; @@ -155,30 +155,29 @@ ENCODER_RESET(iso2022) ENCODER(iso2022) { - while (inleft > 0) { + while (*inpos < inlen) { const struct iso2022_designation *dsg; DBCHAR encoded; - Py_UCS4 c = **inbuf; + Py_UCS4 c = INCHAR1; Py_ssize_t insize; if (c < 0x80) { if (STATE_G0 != CHARSET_ASCII) { - WRITE3(ESC, '(', 'B') + WRITEBYTE3(ESC, '(', 'B') STATE_SETG0(CHARSET_ASCII) - NEXT_OUT(3) + NEXT_OUT(3); } if (STATE_GETFLAG(F_SHIFTED)) { - WRITE1(SI) + WRITEBYTE1(SI) STATE_CLEARFLAG(F_SHIFTED) - NEXT_OUT(1) + NEXT_OUT(1); } - WRITE1((unsigned char)c) - NEXT(1, 1) + WRITEBYTE1((unsigned char)c) + NEXT(1, 1); continue; } - DECODE_SURROGATE(c) - insize = GET_INSIZE(c); + insize = 1; encoded = MAP_UNMAPPABLE; for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) { @@ -187,24 +186,14 @@ ENCODER(iso2022) if (encoded == MAP_MULTIPLE_AVAIL) { /* this implementation won't work for pair * of non-bmp characters. */ - if (inleft < 2) { + if (inlen - *inpos < 2) { if (!(flags & MBENC_FLUSH)) return MBERR_TOOFEW; length = -1; } else length = 2; -#if Py_UNICODE_SIZE == 2 - if (length == 2) { - Py_UCS4 u4in[2]; - u4in[0] = (Py_UCS4)IN1; - u4in[1] = (Py_UCS4)IN2; - encoded = dsg->encoder(u4in, &length); - } else - encoded = dsg->encoder(&c, &length); -#else encoded = dsg->encoder(&c, &length); -#endif if (encoded != MAP_UNMAPPABLE) { insize = length; break; @@ -221,47 +210,47 @@ ENCODER(iso2022) switch (dsg->plane) { case 0: /* G0 */ if (STATE_GETFLAG(F_SHIFTED)) { - WRITE1(SI) + WRITEBYTE1(SI) STATE_CLEARFLAG(F_SHIFTED) - NEXT_OUT(1) + NEXT_OUT(1); } if (STATE_G0 != dsg->mark) { if (dsg->width == 1) { - WRITE3(ESC, '(', ESCMARK(dsg->mark)) + WRITEBYTE3(ESC, '(', ESCMARK(dsg->mark)) STATE_SETG0(dsg->mark) - NEXT_OUT(3) + NEXT_OUT(3); } else if (dsg->mark == CHARSET_JISX0208) { - WRITE3(ESC, '$', ESCMARK(dsg->mark)) + WRITEBYTE3(ESC, '$', ESCMARK(dsg->mark)) STATE_SETG0(dsg->mark) - NEXT_OUT(3) + NEXT_OUT(3); } else { - WRITE4(ESC, '$', '(', + WRITEBYTE4(ESC, '$', '(', ESCMARK(dsg->mark)) STATE_SETG0(dsg->mark) - NEXT_OUT(4) + NEXT_OUT(4); } } break; case 1: /* G1 */ if (STATE_G1 != dsg->mark) { if (dsg->width == 1) { - WRITE3(ESC, ')', ESCMARK(dsg->mark)) + WRITEBYTE3(ESC, ')', ESCMARK(dsg->mark)) STATE_SETG1(dsg->mark) - NEXT_OUT(3) + NEXT_OUT(3); } else { - WRITE4(ESC, '$', ')', + WRITEBYTE4(ESC, '$', ')', ESCMARK(dsg->mark)) STATE_SETG1(dsg->mark) - NEXT_OUT(4) + NEXT_OUT(4); } } if (!STATE_GETFLAG(F_SHIFTED)) { - WRITE1(SO) + WRITEBYTE1(SO) STATE_SETFLAG(F_SHIFTED) - NEXT_OUT(1) + NEXT_OUT(1); } break; default: /* G2 and G3 is not supported: no encoding in @@ -270,14 +259,14 @@ ENCODER(iso2022) } if (dsg->width == 1) { - WRITE1((unsigned char)encoded) - NEXT_OUT(1) + WRITEBYTE1((unsigned char)encoded) + NEXT_OUT(1); } else { - WRITE2(encoded >> 8, encoded & 0xff) - NEXT_OUT(2) + WRITEBYTE2(encoded >> 8, encoded & 0xff) + NEXT_OUT(2); } - NEXT_IN(insize); + NEXT_INCHAR(insize); } return 0; @@ -323,26 +312,26 @@ iso2022processesc(const void *config, MultibyteCodec_State *state, switch (esclen) { case 3: - if (IN2 == '$') { - charset = IN3 | CHARSET_DBCS; + if (INBYTE2 == '$') { + charset = INBYTE3 | CHARSET_DBCS; designation = 0; } else { - charset = IN3; - if (IN2 == '(') designation = 0; - else if (IN2 == ')') designation = 1; - else if (CONFIG_ISSET(USE_G2) && IN2 == '.') + charset = INBYTE3; + if (INBYTE2 == '(') designation = 0; + else if (INBYTE2 == ')') designation = 1; + else if (CONFIG_ISSET(USE_G2) && INBYTE2 == '.') designation = 2; else return 3; } break; case 4: - if (IN2 != '$') + if (INBYTE2 != '$') return 4; - charset = IN4 | CHARSET_DBCS; - if (IN3 == '(') designation = 0; - else if (IN3 == ')') designation = 1; + charset = INBYTE4 | CHARSET_DBCS; + if (INBYTE3 == '(') designation = 0; + else if (INBYTE3 == ')') designation = 1; else return 4; break; case 6: /* designation with prefix */ @@ -395,18 +384,18 @@ iso2022processg2(const void *config, MultibyteCodec_State *state, /* not written to use encoder, decoder functions because only few * encodings use G2 designations in CJKCodecs */ if (STATE_G2 == CHARSET_ISO8859_1) { - if (IN3 < 0x80) - OUTCHAR(IN3 + 0x80); + if (INBYTE3 < 0x80) + OUTCHAR(INBYTE3 + 0x80); else return 3; } else if (STATE_G2 == CHARSET_ISO8859_7) { - ISO8859_7_DECODE(IN3 ^ 0x80, writer) + ISO8859_7_DECODE(INBYTE3 ^ 0x80, writer) else return 3; } else if (STATE_G2 == CHARSET_ASCII) { - if (IN3 & 0x80) return 3; - else OUTCHAR(IN3); + if (INBYTE3 & 0x80) return 3; + else OUTCHAR(INBYTE3); } else return MBERR_INTERNAL; @@ -421,7 +410,7 @@ DECODER(iso2022) const struct iso2022_designation *dsgcache = NULL; while (inleft > 0) { - unsigned char c = IN1; + unsigned char c = INBYTE1; Py_ssize_t err; if (STATE_GETFLAG(F_ESCTHROUGHOUT)) { @@ -438,13 +427,13 @@ DECODER(iso2022) switch (c) { case ESC: REQUIRE_INBUF(2) - if (IS_ISO2022ESC(IN2)) { + if (IS_ISO2022ESC(INBYTE2)) { err = iso2022processesc(config, state, inbuf, &inleft); if (err != 0) return err; } - else if (CONFIG_ISSET(USE_G2) && IN2 == 'N') {/* SS2 */ + else if (CONFIG_ISSET(USE_G2) && INBYTE2 == 'N') {/* SS2 */ REQUIRE_INBUF(3) err = iso2022processg2(config, state, inbuf, &inleft, writer); |