summaryrefslogtreecommitdiffstats
path: root/Modules/cjkcodecs/_codecs_iso2022.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/cjkcodecs/_codecs_iso2022.c')
-rw-r--r--Modules/cjkcodecs/_codecs_iso2022.c475
1 files changed, 247 insertions, 228 deletions
diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c
index cbc1542..5c401aa 100644
--- a/Modules/cjkcodecs/_codecs_iso2022.c
+++ b/Modules/cjkcodecs/_codecs_iso2022.c
@@ -73,7 +73,7 @@
#define F_SHIFTED 0x01
#define F_ESCTHROUGHOUT 0x02
-#define STATE_SETG(dn, v) ((state)->c[dn]) = (v);
+#define STATE_SETG(dn, v) do { ((state)->c[dn]) = (v); } while (0)
#define STATE_GETG(dn) ((state)->c[dn])
#define STATE_G0 STATE_GETG(0)
@@ -85,10 +85,10 @@
#define STATE_SETG2(v) STATE_SETG(2, v)
#define STATE_SETG3(v) STATE_SETG(3, v)
-#define STATE_SETFLAG(f) ((state)->c[4]) |= (f);
+#define STATE_SETFLAG(f) do { ((state)->c[4]) |= (f); } while (0)
#define STATE_GETFLAG(f) ((state)->c[4] & (f))
-#define STATE_CLEARFLAG(f) ((state)->c[4]) &= ~(f);
-#define STATE_CLEARFLAGS() ((state)->c[4]) = 0;
+#define STATE_CLEARFLAG(f) do { ((state)->c[4]) &= ~(f); } while (0)
+#define STATE_CLEARFLAGS() do { ((state)->c[4]) = 0; } while (0)
#define ISO2022_CONFIG ((const struct iso2022_config *)config)
#define CONFIG_ISSET(flag) (ISO2022_CONFIG->flags & (flag))
@@ -102,8 +102,8 @@
/*-*- internal data structures -*-*/
typedef int (*iso2022_init_func)(void);
-typedef ucs4_t (*iso2022_decode_func)(const unsigned char *data);
-typedef DBCHAR (*iso2022_encode_func)(const ucs4_t *data, Py_ssize_t *length);
+typedef Py_UCS4 (*iso2022_decode_func)(const unsigned char *data);
+typedef DBCHAR (*iso2022_encode_func)(const Py_UCS4 *data, Py_ssize_t *length);
struct iso2022_designation {
unsigned char mark;
@@ -132,53 +132,52 @@ CODEC_INIT(iso2022)
ENCODER_INIT(iso2022)
{
- STATE_CLEARFLAGS()
- STATE_SETG0(CHARSET_ASCII)
- STATE_SETG1(CHARSET_ASCII)
+ STATE_CLEARFLAGS();
+ STATE_SETG0(CHARSET_ASCII);
+ STATE_SETG1(CHARSET_ASCII);
return 0;
}
ENCODER_RESET(iso2022)
{
if (STATE_GETFLAG(F_SHIFTED)) {
- WRITE1(SI)
- NEXT_OUT(1)
- STATE_CLEARFLAG(F_SHIFTED)
+ WRITEBYTE1(SI);
+ NEXT_OUT(1);
+ STATE_CLEARFLAG(F_SHIFTED);
}
if (STATE_G0 != CHARSET_ASCII) {
- WRITE3(ESC, '(', 'B')
- NEXT_OUT(3)
- STATE_SETG0(CHARSET_ASCII)
+ WRITEBYTE3(ESC, '(', 'B');
+ NEXT_OUT(3);
+ STATE_SETG0(CHARSET_ASCII);
}
return 0;
}
ENCODER(iso2022)
{
- while (inleft > 0) {
+ while (*inpos < inlen) {
const struct iso2022_designation *dsg;
DBCHAR encoded;
- ucs4_t c = **inbuf;
+ Py_UCS4 c = INCHAR1;
Py_ssize_t insize;
if (c < 0x80) {
if (STATE_G0 != CHARSET_ASCII) {
- WRITE3(ESC, '(', 'B')
- STATE_SETG0(CHARSET_ASCII)
- NEXT_OUT(3)
+ WRITEBYTE3(ESC, '(', 'B');
+ STATE_SETG0(CHARSET_ASCII);
+ NEXT_OUT(3);
}
if (STATE_GETFLAG(F_SHIFTED)) {
- WRITE1(SI)
- STATE_CLEARFLAG(F_SHIFTED)
- NEXT_OUT(1)
+ WRITEBYTE1(SI);
+ STATE_CLEARFLAG(F_SHIFTED);
+ NEXT_OUT(1);
}
- WRITE1((unsigned char)c)
- NEXT(1, 1)
+ WRITEBYTE1((unsigned char)c);
+ NEXT(1, 1);
continue;
}
- DECODE_SURROGATE(c)
- insize = GET_INSIZE(c);
+ insize = 1;
encoded = MAP_UNMAPPABLE;
for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) {
@@ -187,24 +186,14 @@ ENCODER(iso2022)
if (encoded == MAP_MULTIPLE_AVAIL) {
/* this implementation won't work for pair
* of non-bmp characters. */
- if (inleft < 2) {
+ if (inlen - *inpos < 2) {
if (!(flags & MBENC_FLUSH))
return MBERR_TOOFEW;
length = -1;
}
else
length = 2;
-#if Py_UNICODE_SIZE == 2
- if (length == 2) {
- ucs4_t u4in[2];
- u4in[0] = (ucs4_t)IN1;
- u4in[1] = (ucs4_t)IN2;
- encoded = dsg->encoder(u4in, &length);
- } else
- encoded = dsg->encoder(&c, &length);
-#else
encoded = dsg->encoder(&c, &length);
-#endif
if (encoded != MAP_UNMAPPABLE) {
insize = length;
break;
@@ -221,47 +210,46 @@ ENCODER(iso2022)
switch (dsg->plane) {
case 0: /* G0 */
if (STATE_GETFLAG(F_SHIFTED)) {
- WRITE1(SI)
- STATE_CLEARFLAG(F_SHIFTED)
- NEXT_OUT(1)
+ WRITEBYTE1(SI);
+ STATE_CLEARFLAG(F_SHIFTED);
+ NEXT_OUT(1);
}
if (STATE_G0 != dsg->mark) {
if (dsg->width == 1) {
- WRITE3(ESC, '(', ESCMARK(dsg->mark))
- STATE_SETG0(dsg->mark)
- NEXT_OUT(3)
+ WRITEBYTE3(ESC, '(', ESCMARK(dsg->mark));
+ STATE_SETG0(dsg->mark);
+ NEXT_OUT(3);
}
else if (dsg->mark == CHARSET_JISX0208) {
- WRITE3(ESC, '$', ESCMARK(dsg->mark))
- STATE_SETG0(dsg->mark)
- NEXT_OUT(3)
+ WRITEBYTE3(ESC, '$', ESCMARK(dsg->mark));
+ STATE_SETG0(dsg->mark);
+ NEXT_OUT(3);
}
else {
- WRITE4(ESC, '$', '(',
- ESCMARK(dsg->mark))
- STATE_SETG0(dsg->mark)
- NEXT_OUT(4)
+ WRITEBYTE4(ESC, '$', '(',
+ ESCMARK(dsg->mark));
+ STATE_SETG0(dsg->mark);
+ NEXT_OUT(4);
}
}
break;
case 1: /* G1 */
if (STATE_G1 != dsg->mark) {
if (dsg->width == 1) {
- WRITE3(ESC, ')', ESCMARK(dsg->mark))
- STATE_SETG1(dsg->mark)
- NEXT_OUT(3)
+ WRITEBYTE3(ESC, ')', ESCMARK(dsg->mark));
+ STATE_SETG1(dsg->mark);
+ NEXT_OUT(3);
}
else {
- WRITE4(ESC, '$', ')',
- ESCMARK(dsg->mark))
- STATE_SETG1(dsg->mark)
- NEXT_OUT(4)
+ WRITEBYTE4(ESC, '$', ')', ESCMARK(dsg->mark));
+ STATE_SETG1(dsg->mark);
+ NEXT_OUT(4);
}
}
if (!STATE_GETFLAG(F_SHIFTED)) {
- WRITE1(SO)
- STATE_SETFLAG(F_SHIFTED)
- NEXT_OUT(1)
+ WRITEBYTE1(SO);
+ STATE_SETFLAG(F_SHIFTED);
+ NEXT_OUT(1);
}
break;
default: /* G2 and G3 is not supported: no encoding in
@@ -270,14 +258,14 @@ ENCODER(iso2022)
}
if (dsg->width == 1) {
- WRITE1((unsigned char)encoded)
- NEXT_OUT(1)
+ WRITEBYTE1((unsigned char)encoded);
+ NEXT_OUT(1);
}
else {
- WRITE2(encoded >> 8, encoded & 0xff)
- NEXT_OUT(2)
+ WRITEBYTE2(encoded >> 8, encoded & 0xff);
+ NEXT_OUT(2);
}
- NEXT_IN(insize)
+ NEXT_INCHAR(insize);
}
return 0;
@@ -285,17 +273,17 @@ ENCODER(iso2022)
DECODER_INIT(iso2022)
{
- STATE_CLEARFLAGS()
- STATE_SETG0(CHARSET_ASCII)
- STATE_SETG1(CHARSET_ASCII)
- STATE_SETG2(CHARSET_ASCII)
+ STATE_CLEARFLAGS();
+ STATE_SETG0(CHARSET_ASCII);
+ STATE_SETG1(CHARSET_ASCII);
+ STATE_SETG2(CHARSET_ASCII);
return 0;
}
DECODER_RESET(iso2022)
{
- STATE_SETG0(CHARSET_ASCII)
- STATE_CLEARFLAG(F_SHIFTED)
+ STATE_SETG0(CHARSET_ASCII);
+ STATE_CLEARFLAG(F_SHIFTED);
return 0;
}
@@ -314,8 +302,9 @@ iso2022processesc(const void *config, MultibyteCodec_State *state,
break;
}
else if (CONFIG_ISSET(USE_JISX0208_EXT) && i+1 < *inleft &&
- (*inbuf)[i] == '&' && (*inbuf)[i+1] == '@')
+ (*inbuf)[i] == '&' && (*inbuf)[i+1] == '@') {
i += 2;
+ }
}
if (i >= MAX_ESCSEQLEN)
@@ -323,27 +312,33 @@ iso2022processesc(const void *config, MultibyteCodec_State *state,
switch (esclen) {
case 3:
- if (IN2 == '$') {
- charset = IN3 | CHARSET_DBCS;
+ if (INBYTE2 == '$') {
+ charset = INBYTE3 | CHARSET_DBCS;
designation = 0;
}
else {
- charset = IN3;
- if (IN2 == '(') designation = 0;
- else if (IN2 == ')') designation = 1;
- else if (CONFIG_ISSET(USE_G2) && IN2 == '.')
+ charset = INBYTE3;
+ if (INBYTE2 == '(')
+ designation = 0;
+ else if (INBYTE2 == ')')
+ designation = 1;
+ else if (CONFIG_ISSET(USE_G2) && INBYTE2 == '.')
designation = 2;
- else return 3;
+ else
+ return 3;
}
break;
case 4:
- if (IN2 != '$')
+ if (INBYTE2 != '$')
return 4;
- charset = IN4 | CHARSET_DBCS;
- if (IN3 == '(') designation = 0;
- else if (IN3 == ')') designation = 1;
- else return 4;
+ charset = INBYTE4 | CHARSET_DBCS;
+ if (INBYTE3 == '(')
+ designation = 0;
+ else if (INBYTE3 == ')')
+ designation = 1;
+ else
+ return 4;
break;
case 6: /* designation with prefix */
if (CONFIG_ISSET(USE_JISX0208_EXT) &&
@@ -363,58 +358,65 @@ iso2022processesc(const void *config, MultibyteCodec_State *state,
if (charset != CHARSET_ASCII) {
const struct iso2022_designation *dsg;
- for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++)
+ for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) {
if (dsg->mark == charset)
break;
+ }
if (!dsg->mark)
return esclen;
}
- STATE_SETG(designation, charset)
+ STATE_SETG(designation, charset);
*inleft -= esclen;
(*inbuf) += esclen;
return 0;
}
-#define ISO8859_7_DECODE(c, assi) \
- if ((c) < 0xa0) (assi) = (c); \
- else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \
- (assi) = (c); \
- else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \
- (0xbffffd77L & (1L << ((c)-0xb4))))) \
- (assi) = 0x02d0 + (c); \
- else if ((c) == 0xa1) (assi) = 0x2018; \
- else if ((c) == 0xa2) (assi) = 0x2019; \
- else if ((c) == 0xaf) (assi) = 0x2015;
+#define ISO8859_7_DECODE(c, writer) \
+ if ((c) < 0xa0) { \
+ OUTCHAR(c); \
+ } else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) { \
+ OUTCHAR(c); \
+ } else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \
+ (0xbffffd77L & (1L << ((c)-0xb4))))) { \
+ OUTCHAR(0x02d0 + (c)); \
+ } else if ((c) == 0xa1) { \
+ OUTCHAR(0x2018); \
+ } else if ((c) == 0xa2) { \
+ OUTCHAR(0x2019); \
+ } else if ((c) == 0xaf) { \
+ OUTCHAR(0x2015); \
+ }
static Py_ssize_t
iso2022processg2(const void *config, MultibyteCodec_State *state,
const unsigned char **inbuf, Py_ssize_t *inleft,
- Py_UNICODE **outbuf, Py_ssize_t *outleft)
+ _PyUnicodeWriter *writer)
{
/* not written to use encoder, decoder functions because only few
* encodings use G2 designations in CJKCodecs */
if (STATE_G2 == CHARSET_ISO8859_1) {
- if (IN3 < 0x80)
- OUT1(IN3 + 0x80)
+ if (INBYTE3 < 0x80)
+ OUTCHAR(INBYTE3 + 0x80);
else
return 3;
}
else if (STATE_G2 == CHARSET_ISO8859_7) {
- ISO8859_7_DECODE(IN3 ^ 0x80, **outbuf)
- else return 3;
+ ISO8859_7_DECODE(INBYTE3 ^ 0x80, writer)
+ else
+ return 3;
}
else if (STATE_G2 == CHARSET_ASCII) {
- if (IN3 & 0x80) return 3;
- else **outbuf = IN3;
+ if (INBYTE3 & 0x80)
+ return 3;
+ else
+ OUTCHAR(INBYTE3);
}
else
return MBERR_INTERNAL;
(*inbuf) += 3;
*inleft -= 3;
- (*outbuf) += 1;
- *outleft -= 1;
return 0;
}
@@ -423,58 +425,58 @@ DECODER(iso2022)
const struct iso2022_designation *dsgcache = NULL;
while (inleft > 0) {
- unsigned char c = IN1;
+ unsigned char c = INBYTE1;
Py_ssize_t err;
if (STATE_GETFLAG(F_ESCTHROUGHOUT)) {
/* ESC throughout mode:
* for non-iso2022 escape sequences */
- WRITE1(c) /* assume as ISO-8859-1 */
- NEXT(1, 1)
+ OUTCHAR(c); /* assume as ISO-8859-1 */
+ NEXT_IN(1);
if (IS_ESCEND(c)) {
- STATE_CLEARFLAG(F_ESCTHROUGHOUT)
+ STATE_CLEARFLAG(F_ESCTHROUGHOUT);
}
continue;
}
switch (c) {
case ESC:
- REQUIRE_INBUF(2)
- if (IS_ISO2022ESC(IN2)) {
+ REQUIRE_INBUF(2);
+ if (IS_ISO2022ESC(INBYTE2)) {
err = iso2022processesc(config, state,
inbuf, &inleft);
if (err != 0)
return err;
}
- else if (CONFIG_ISSET(USE_G2) && IN2 == 'N') {/* SS2 */
- REQUIRE_INBUF(3)
+ else if (CONFIG_ISSET(USE_G2) && INBYTE2 == 'N') {/* SS2 */
+ REQUIRE_INBUF(3);
err = iso2022processg2(config, state,
- inbuf, &inleft, outbuf, &outleft);
+ inbuf, &inleft, writer);
if (err != 0)
return err;
}
else {
- WRITE1(ESC)
- STATE_SETFLAG(F_ESCTHROUGHOUT)
- NEXT(1, 1)
+ OUTCHAR(ESC);
+ STATE_SETFLAG(F_ESCTHROUGHOUT);
+ NEXT_IN(1);
}
break;
case SI:
if (CONFIG_ISSET(NO_SHIFT))
goto bypass;
- STATE_CLEARFLAG(F_SHIFTED)
- NEXT_IN(1)
+ STATE_CLEARFLAG(F_SHIFTED);
+ NEXT_IN(1);
break;
case SO:
if (CONFIG_ISSET(NO_SHIFT))
goto bypass;
- STATE_SETFLAG(F_SHIFTED)
- NEXT_IN(1)
+ STATE_SETFLAG(F_SHIFTED);
+ NEXT_IN(1);
break;
case LF:
- STATE_CLEARFLAG(F_SHIFTED)
- WRITE1(LF)
- NEXT(1, 1)
+ STATE_CLEARFLAG(F_SHIFTED);
+ OUTCHAR(LF);
+ NEXT_IN(1);
break;
default:
if (c < 0x20) /* C0 */
@@ -484,7 +486,7 @@ DECODER(iso2022)
else {
const struct iso2022_designation *dsg;
unsigned char charset;
- ucs4_t decoded;
+ Py_UCS4 decoded;
if (STATE_GETFLAG(F_SHIFTED))
charset = STATE_G1;
@@ -492,43 +494,44 @@ DECODER(iso2022)
charset = STATE_G0;
if (charset == CHARSET_ASCII) {
-bypass: WRITE1(c)
- NEXT(1, 1)
- break;
- }
-
- if (dsgcache != NULL &&
- dsgcache->mark == charset)
- dsg = dsgcache;
- else {
- for (dsg = CONFIG_DESIGNATIONS;
- dsg->mark != charset
+bypass:
+ OUTCHAR(c);
+ NEXT_IN(1);
+ break;
+ }
+
+ if (dsgcache != NULL &&
+ dsgcache->mark == charset)
+ dsg = dsgcache;
+ else {
+ for (dsg = CONFIG_DESIGNATIONS;
+ dsg->mark != charset
#ifdef Py_DEBUG
- && dsg->mark != '\0'
+ && dsg->mark != '\0'
#endif
- ;dsg++)
- /* noop */;
- assert(dsg->mark != '\0');
- dsgcache = dsg;
- }
-
- REQUIRE_INBUF(dsg->width)
- decoded = dsg->decoder(*inbuf);
- if (decoded == MAP_UNMAPPABLE)
- return dsg->width;
-
- if (decoded < 0x10000) {
- WRITE1(decoded)
- NEXT_OUT(1)
- }
- else if (decoded < 0x30000) {
- WRITEUCS4(decoded)
- }
- else { /* JIS X 0213 pairs */
- WRITE2(decoded >> 16, decoded & 0xffff)
- NEXT_OUT(2)
+ ; dsg++)
+ {
+ /* noop */
+ }
+ assert(dsg->mark != '\0');
+ dsgcache = dsg;
}
- NEXT_IN(dsg->width)
+
+ REQUIRE_INBUF(dsg->width);
+ decoded = dsg->decoder(*inbuf);
+ if (decoded == MAP_UNMAPPABLE)
+ return dsg->width;
+
+ if (decoded < 0x10000) {
+ OUTCHAR(decoded);
+ }
+ else if (decoded < 0x30000) {
+ OUTCHAR(decoded);
+ }
+ else { /* JIS X 0213 pairs */
+ OUTCHAR2(decoded >> 16, decoded & 0xffff);
+ }
+ NEXT_IN(dsg->width);
}
break;
}
@@ -577,25 +580,26 @@ ksx1001_init(void)
return 0;
}
-static ucs4_t
+static Py_UCS4
ksx1001_decoder(const unsigned char *data)
{
- ucs4_t u;
- TRYMAP_DEC(ksx1001, u, data[0], data[1])
+ Py_UCS4 u;
+ if (TRYMAP_DEC(ksx1001, u, data[0], data[1]))
return u;
else
return MAP_UNMAPPABLE;
}
static DBCHAR
-ksx1001_encoder(const ucs4_t *data, Py_ssize_t *length)
+ksx1001_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded;
assert(*length == 1);
if (*data < 0x10000) {
- TRYMAP_ENC(cp949, coded, *data)
+ if (TRYMAP_ENC(cp949, coded, *data)) {
if (!(coded & 0x8000))
return coded;
+ }
}
return MAP_UNMAPPABLE;
}
@@ -613,27 +617,27 @@ jisx0208_init(void)
return 0;
}
-static ucs4_t
+static Py_UCS4
jisx0208_decoder(const unsigned char *data)
{
- ucs4_t u;
+ Py_UCS4 u;
if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
return 0xff3c;
- else TRYMAP_DEC(jisx0208, u, data[0], data[1])
+ else if (TRYMAP_DEC(jisx0208, u, data[0], data[1]))
return u;
else
return MAP_UNMAPPABLE;
}
static DBCHAR
-jisx0208_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0208_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded;
assert(*length == 1);
if (*data < 0x10000) {
if (*data == 0xff3c) /* F/W REVERSE SOLIDUS */
return 0x2140;
- else TRYMAP_ENC(jisxcommon, coded, *data) {
+ else if (TRYMAP_ENC(jisxcommon, coded, *data)) {
if (!(coded & 0x8000))
return coded;
}
@@ -654,23 +658,23 @@ jisx0212_init(void)
return 0;
}
-static ucs4_t
+static Py_UCS4
jisx0212_decoder(const unsigned char *data)
{
- ucs4_t u;
- TRYMAP_DEC(jisx0212, u, data[0], data[1])
+ Py_UCS4 u;
+ if (TRYMAP_DEC(jisx0212, u, data[0], data[1]))
return u;
else
return MAP_UNMAPPABLE;
}
static DBCHAR
-jisx0212_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0212_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded;
assert(*length == 1);
if (*data < 0x10000) {
- TRYMAP_ENC(jisxcommon, coded, *data) {
+ if (TRYMAP_ENC(jisxcommon, coded, *data)) {
if (coded & 0x8000)
return coded & 0x7fff;
}
@@ -705,30 +709,34 @@ jisx0213_init(void)
}
#define config ((void *)2000)
-static ucs4_t
+static Py_UCS4
jisx0213_2000_1_decoder(const unsigned char *data)
{
- ucs4_t u;
+ Py_UCS4 u;
EMULATE_JISX0213_2000_DECODE_PLANE1(u, data[0], data[1])
else if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
return 0xff3c;
- else TRYMAP_DEC(jisx0208, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])
+ else if (TRYMAP_DEC(jisx0208, u, data[0], data[1]))
+ ;
+ else if (TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]))
+ ;
+ else if (TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1]))
u |= 0x20000;
- else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]);
+ else if (TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]))
+ ;
else
return MAP_UNMAPPABLE;
return u;
}
-static ucs4_t
+static Py_UCS4
jisx0213_2000_2_decoder(const unsigned char *data)
{
- ucs4_t u;
- EMULATE_JISX0213_2000_DECODE_PLANE2(u, data[0], data[1])
- TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])
+ Py_UCS4 u;
+ EMULATE_JISX0213_2000_DECODE_PLANE2_CHAR(u, data[0], data[1])
+ if (TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]))
+ ;
+ else if (TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1]))
u |= 0x20000;
else
return MAP_UNMAPPABLE;
@@ -736,28 +744,32 @@ jisx0213_2000_2_decoder(const unsigned char *data)
}
#undef config
-static ucs4_t
+static Py_UCS4
jisx0213_2004_1_decoder(const unsigned char *data)
{
- ucs4_t u;
+ Py_UCS4 u;
if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
return 0xff3c;
- else TRYMAP_DEC(jisx0208, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])
+ else if (TRYMAP_DEC(jisx0208, u, data[0], data[1]))
+ ;
+ else if (TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]))
+ ;
+ else if (TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1]))
u |= 0x20000;
- else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]);
+ else if (TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]))
+ ;
else
return MAP_UNMAPPABLE;
return u;
}
-static ucs4_t
+static Py_UCS4
jisx0213_2004_2_decoder(const unsigned char *data)
{
- ucs4_t u;
- TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])
+ Py_UCS4 u;
+ if (TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]))
+ ;
+ else if (TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1]))
u |= 0x20000;
else
return MAP_UNMAPPABLE;
@@ -765,7 +777,7 @@ jisx0213_2004_2_decoder(const unsigned char *data)
}
static DBCHAR
-jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config)
+jisx0213_encoder(const Py_UCS4 *data, Py_ssize_t *length, void *config)
{
DBCHAR coded;
@@ -774,28 +786,28 @@ jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config)
if (*data >= 0x10000) {
if ((*data) >> 16 == 0x20000 >> 16) {
EMULATE_JISX0213_2000_ENCODE_EMP(coded, *data)
- else TRYMAP_ENC(jisx0213_emp, coded,
- (*data) & 0xffff)
+ else if (TRYMAP_ENC(jisx0213_emp, coded, (*data) & 0xffff))
return coded;
}
return MAP_UNMAPPABLE;
}
EMULATE_JISX0213_2000_ENCODE_BMP(coded, *data)
- else TRYMAP_ENC(jisx0213_bmp, coded, *data) {
+ else if (TRYMAP_ENC(jisx0213_bmp, coded, *data)) {
if (coded == MULTIC)
return MAP_MULTIPLE_AVAIL;
}
- else TRYMAP_ENC(jisxcommon, coded, *data) {
+ else if (TRYMAP_ENC(jisxcommon, coded, *data)) {
if (coded & 0x8000)
return MAP_UNMAPPABLE;
}
else
return MAP_UNMAPPABLE;
return coded;
+
case 2: /* second character of unicode pair */
coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1],
- jisx0213_pair_encmap, JISX0213_ENCPAIRS);
+ jisx0213_pair_encmap, JISX0213_ENCPAIRS);
if (coded == DBCINV) {
*length = 1;
coded = find_pairencmap((ucs2_t)data[0], 0,
@@ -805,21 +817,24 @@ jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config)
}
else
return coded;
+
case -1: /* flush unterminated */
*length = 1;
coded = find_pairencmap((ucs2_t)data[0], 0,
- jisx0213_pair_encmap, JISX0213_ENCPAIRS);
+ jisx0213_pair_encmap, JISX0213_ENCPAIRS);
if (coded == DBCINV)
return MAP_UNMAPPABLE;
else
return coded;
+ break;
+
default:
return MAP_UNMAPPABLE;
}
}
static DBCHAR
-jisx0213_2000_1_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0213_2000_1_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
@@ -831,7 +846,7 @@ jisx0213_2000_1_encoder(const ucs4_t *data, Py_ssize_t *length)
}
static DBCHAR
-jisx0213_2000_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)
+jisx0213_2000_1_encoder_paironly(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded;
Py_ssize_t ilength = *length;
@@ -854,7 +869,7 @@ jisx0213_2000_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)
}
static DBCHAR
-jisx0213_2000_2_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0213_2000_2_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
@@ -866,7 +881,7 @@ jisx0213_2000_2_encoder(const ucs4_t *data, Py_ssize_t *length)
}
static DBCHAR
-jisx0213_2004_1_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0213_2004_1_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded = jisx0213_encoder(data, length, NULL);
if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
@@ -878,7 +893,7 @@ jisx0213_2004_1_encoder(const ucs4_t *data, Py_ssize_t *length)
}
static DBCHAR
-jisx0213_2004_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)
+jisx0213_2004_1_encoder_paironly(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded;
Py_ssize_t ilength = *length;
@@ -901,7 +916,7 @@ jisx0213_2004_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)
}
static DBCHAR
-jisx0213_2004_2_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0213_2004_2_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded = jisx0213_encoder(data, length, NULL);
if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
@@ -912,39 +927,43 @@ jisx0213_2004_2_encoder(const ucs4_t *data, Py_ssize_t *length)
return MAP_UNMAPPABLE;
}
-static ucs4_t
+static Py_UCS4
jisx0201_r_decoder(const unsigned char *data)
{
- ucs4_t u;
- JISX0201_R_DECODE(*data, u)
- else return MAP_UNMAPPABLE;
+ Py_UCS4 u;
+ JISX0201_R_DECODE_CHAR(*data, u)
+ else
+ return MAP_UNMAPPABLE;
return u;
}
static DBCHAR
-jisx0201_r_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0201_r_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded;
JISX0201_R_ENCODE(*data, coded)
- else return MAP_UNMAPPABLE;
+ else
+ return MAP_UNMAPPABLE;
return coded;
}
-static ucs4_t
+static Py_UCS4
jisx0201_k_decoder(const unsigned char *data)
{
- ucs4_t u;
- JISX0201_K_DECODE(*data ^ 0x80, u)
- else return MAP_UNMAPPABLE;
+ Py_UCS4 u;
+ JISX0201_K_DECODE_CHAR(*data ^ 0x80, u)
+ else
+ return MAP_UNMAPPABLE;
return u;
}
static DBCHAR
-jisx0201_k_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0201_k_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded;
JISX0201_K_ENCODE(*data, coded)
- else return MAP_UNMAPPABLE;
+ else
+ return MAP_UNMAPPABLE;
return coded - 0x80;
}
@@ -961,23 +980,23 @@ gb2312_init(void)
return 0;
}
-static ucs4_t
+static Py_UCS4
gb2312_decoder(const unsigned char *data)
{
- ucs4_t u;
- TRYMAP_DEC(gb2312, u, data[0], data[1])
+ Py_UCS4 u;
+ if (TRYMAP_DEC(gb2312, u, data[0], data[1]))
return u;
else
return MAP_UNMAPPABLE;
}
static DBCHAR
-gb2312_encoder(const ucs4_t *data, Py_ssize_t *length)
+gb2312_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded;
assert(*length == 1);
if (*data < 0x10000) {
- TRYMAP_ENC(gbcommon, coded, *data) {
+ if (TRYMAP_ENC(gbcommon, coded, *data)) {
if (!(coded & 0x8000))
return coded;
}
@@ -986,14 +1005,14 @@ gb2312_encoder(const ucs4_t *data, Py_ssize_t *length)
}
-static ucs4_t
+static Py_UCS4
dummy_decoder(const unsigned char *data)
{
return MAP_UNMAPPABLE;
}
static DBCHAR
-dummy_encoder(const ucs4_t *data, Py_ssize_t *length)
+dummy_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
return MAP_UNMAPPABLE;
}