summaryrefslogtreecommitdiffstats
path: root/Modules/cjkcodecs/_codecs_iso2022.c
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2013-04-14 00:06:32 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2013-04-14 00:06:32 (GMT)
commitd949126995a7ff63590285aa816da65d97a31403 (patch)
tree5fae129dff24d0a40c415b2a8d1559ff613aceea /Modules/cjkcodecs/_codecs_iso2022.c
parent71557596b26e9c899e83adc99659732097097c4e (diff)
downloadcpython-d949126995a7ff63590285aa816da65d97a31403.zip
cpython-d949126995a7ff63590285aa816da65d97a31403.tar.gz
cpython-d949126995a7ff63590285aa816da65d97a31403.tar.bz2
Issue #17693: CJK encoders now use the new Unicode API (PEP 393)
Diffstat (limited to 'Modules/cjkcodecs/_codecs_iso2022.c')
-rw-r--r--Modules/cjkcodecs/_codecs_iso2022.c113
1 files changed, 51 insertions, 62 deletions
diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c
index ae14677..bb63835 100644
--- a/Modules/cjkcodecs/_codecs_iso2022.c
+++ b/Modules/cjkcodecs/_codecs_iso2022.c
@@ -141,13 +141,13 @@ ENCODER_INIT(iso2022)
ENCODER_RESET(iso2022)
{
if (STATE_GETFLAG(F_SHIFTED)) {
- WRITE1(SI)
- NEXT_OUT(1)
+ WRITEBYTE1(SI)
+ NEXT_OUT(1);
STATE_CLEARFLAG(F_SHIFTED)
}
if (STATE_G0 != CHARSET_ASCII) {
- WRITE3(ESC, '(', 'B')
- NEXT_OUT(3)
+ WRITEBYTE3(ESC, '(', 'B')
+ NEXT_OUT(3);
STATE_SETG0(CHARSET_ASCII)
}
return 0;
@@ -155,30 +155,29 @@ ENCODER_RESET(iso2022)
ENCODER(iso2022)
{
- while (inleft > 0) {
+ while (*inpos < inlen) {
const struct iso2022_designation *dsg;
DBCHAR encoded;
- Py_UCS4 c = **inbuf;
+ Py_UCS4 c = INCHAR1;
Py_ssize_t insize;
if (c < 0x80) {
if (STATE_G0 != CHARSET_ASCII) {
- WRITE3(ESC, '(', 'B')
+ WRITEBYTE3(ESC, '(', 'B')
STATE_SETG0(CHARSET_ASCII)
- NEXT_OUT(3)
+ NEXT_OUT(3);
}
if (STATE_GETFLAG(F_SHIFTED)) {
- WRITE1(SI)
+ WRITEBYTE1(SI)
STATE_CLEARFLAG(F_SHIFTED)
- NEXT_OUT(1)
+ NEXT_OUT(1);
}
- WRITE1((unsigned char)c)
- NEXT(1, 1)
+ WRITEBYTE1((unsigned char)c)
+ NEXT(1, 1);
continue;
}
- DECODE_SURROGATE(c)
- insize = GET_INSIZE(c);
+ insize = 1;
encoded = MAP_UNMAPPABLE;
for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) {
@@ -187,24 +186,14 @@ ENCODER(iso2022)
if (encoded == MAP_MULTIPLE_AVAIL) {
/* this implementation won't work for pair
* of non-bmp characters. */
- if (inleft < 2) {
+ if (inlen - *inpos < 2) {
if (!(flags & MBENC_FLUSH))
return MBERR_TOOFEW;
length = -1;
}
else
length = 2;
-#if Py_UNICODE_SIZE == 2
- if (length == 2) {
- Py_UCS4 u4in[2];
- u4in[0] = (Py_UCS4)IN1;
- u4in[1] = (Py_UCS4)IN2;
- encoded = dsg->encoder(u4in, &length);
- } else
- encoded = dsg->encoder(&c, &length);
-#else
encoded = dsg->encoder(&c, &length);
-#endif
if (encoded != MAP_UNMAPPABLE) {
insize = length;
break;
@@ -221,47 +210,47 @@ ENCODER(iso2022)
switch (dsg->plane) {
case 0: /* G0 */
if (STATE_GETFLAG(F_SHIFTED)) {
- WRITE1(SI)
+ WRITEBYTE1(SI)
STATE_CLEARFLAG(F_SHIFTED)
- NEXT_OUT(1)
+ NEXT_OUT(1);
}
if (STATE_G0 != dsg->mark) {
if (dsg->width == 1) {
- WRITE3(ESC, '(', ESCMARK(dsg->mark))
+ WRITEBYTE3(ESC, '(', ESCMARK(dsg->mark))
STATE_SETG0(dsg->mark)
- NEXT_OUT(3)
+ NEXT_OUT(3);
}
else if (dsg->mark == CHARSET_JISX0208) {
- WRITE3(ESC, '$', ESCMARK(dsg->mark))
+ WRITEBYTE3(ESC, '$', ESCMARK(dsg->mark))
STATE_SETG0(dsg->mark)
- NEXT_OUT(3)
+ NEXT_OUT(3);
}
else {
- WRITE4(ESC, '$', '(',
+ WRITEBYTE4(ESC, '$', '(',
ESCMARK(dsg->mark))
STATE_SETG0(dsg->mark)
- NEXT_OUT(4)
+ NEXT_OUT(4);
}
}
break;
case 1: /* G1 */
if (STATE_G1 != dsg->mark) {
if (dsg->width == 1) {
- WRITE3(ESC, ')', ESCMARK(dsg->mark))
+ WRITEBYTE3(ESC, ')', ESCMARK(dsg->mark))
STATE_SETG1(dsg->mark)
- NEXT_OUT(3)
+ NEXT_OUT(3);
}
else {
- WRITE4(ESC, '$', ')',
+ WRITEBYTE4(ESC, '$', ')',
ESCMARK(dsg->mark))
STATE_SETG1(dsg->mark)
- NEXT_OUT(4)
+ NEXT_OUT(4);
}
}
if (!STATE_GETFLAG(F_SHIFTED)) {
- WRITE1(SO)
+ WRITEBYTE1(SO)
STATE_SETFLAG(F_SHIFTED)
- NEXT_OUT(1)
+ NEXT_OUT(1);
}
break;
default: /* G2 and G3 is not supported: no encoding in
@@ -270,14 +259,14 @@ ENCODER(iso2022)
}
if (dsg->width == 1) {
- WRITE1((unsigned char)encoded)
- NEXT_OUT(1)
+ WRITEBYTE1((unsigned char)encoded)
+ NEXT_OUT(1);
}
else {
- WRITE2(encoded >> 8, encoded & 0xff)
- NEXT_OUT(2)
+ WRITEBYTE2(encoded >> 8, encoded & 0xff)
+ NEXT_OUT(2);
}
- NEXT_IN(insize);
+ NEXT_INCHAR(insize);
}
return 0;
@@ -323,26 +312,26 @@ iso2022processesc(const void *config, MultibyteCodec_State *state,
switch (esclen) {
case 3:
- if (IN2 == '$') {
- charset = IN3 | CHARSET_DBCS;
+ if (INBYTE2 == '$') {
+ charset = INBYTE3 | CHARSET_DBCS;
designation = 0;
}
else {
- charset = IN3;
- if (IN2 == '(') designation = 0;
- else if (IN2 == ')') designation = 1;
- else if (CONFIG_ISSET(USE_G2) && IN2 == '.')
+ charset = INBYTE3;
+ if (INBYTE2 == '(') designation = 0;
+ else if (INBYTE2 == ')') designation = 1;
+ else if (CONFIG_ISSET(USE_G2) && INBYTE2 == '.')
designation = 2;
else return 3;
}
break;
case 4:
- if (IN2 != '$')
+ if (INBYTE2 != '$')
return 4;
- charset = IN4 | CHARSET_DBCS;
- if (IN3 == '(') designation = 0;
- else if (IN3 == ')') designation = 1;
+ charset = INBYTE4 | CHARSET_DBCS;
+ if (INBYTE3 == '(') designation = 0;
+ else if (INBYTE3 == ')') designation = 1;
else return 4;
break;
case 6: /* designation with prefix */
@@ -395,18 +384,18 @@ iso2022processg2(const void *config, MultibyteCodec_State *state,
/* not written to use encoder, decoder functions because only few
* encodings use G2 designations in CJKCodecs */
if (STATE_G2 == CHARSET_ISO8859_1) {
- if (IN3 < 0x80)
- OUTCHAR(IN3 + 0x80);
+ if (INBYTE3 < 0x80)
+ OUTCHAR(INBYTE3 + 0x80);
else
return 3;
}
else if (STATE_G2 == CHARSET_ISO8859_7) {
- ISO8859_7_DECODE(IN3 ^ 0x80, writer)
+ ISO8859_7_DECODE(INBYTE3 ^ 0x80, writer)
else return 3;
}
else if (STATE_G2 == CHARSET_ASCII) {
- if (IN3 & 0x80) return 3;
- else OUTCHAR(IN3);
+ if (INBYTE3 & 0x80) return 3;
+ else OUTCHAR(INBYTE3);
}
else
return MBERR_INTERNAL;
@@ -421,7 +410,7 @@ DECODER(iso2022)
const struct iso2022_designation *dsgcache = NULL;
while (inleft > 0) {
- unsigned char c = IN1;
+ unsigned char c = INBYTE1;
Py_ssize_t err;
if (STATE_GETFLAG(F_ESCTHROUGHOUT)) {
@@ -438,13 +427,13 @@ DECODER(iso2022)
switch (c) {
case ESC:
REQUIRE_INBUF(2)
- if (IS_ISO2022ESC(IN2)) {
+ if (IS_ISO2022ESC(INBYTE2)) {
err = iso2022processesc(config, state,
inbuf, &inleft);
if (err != 0)
return err;
}
- else if (CONFIG_ISSET(USE_G2) && IN2 == 'N') {/* SS2 */
+ else if (CONFIG_ISSET(USE_G2) && INBYTE2 == 'N') {/* SS2 */
REQUIRE_INBUF(3)
err = iso2022processg2(config, state,
inbuf, &inleft, writer);