summaryrefslogtreecommitdiffstats
path: root/Modules/cjkcodecs/_codecs_iso2022.c
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2010-05-09 15:52:27 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2010-05-09 15:52:27 (GMT)
commitf95a1b3c53bdd678b64aa608d4375660033460c3 (patch)
treea8bee40b1b14e28ff5978ea519f3035a3c399912 /Modules/cjkcodecs/_codecs_iso2022.c
parentbd250300191133d276a71b395b6428081bf825b8 (diff)
downloadcpython-f95a1b3c53bdd678b64aa608d4375660033460c3.zip
cpython-f95a1b3c53bdd678b64aa608d4375660033460c3.tar.gz
cpython-f95a1b3c53bdd678b64aa608d4375660033460c3.tar.bz2
Recorded merge of revisions 81029 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk ........ r81029 | antoine.pitrou | 2010-05-09 16:46:46 +0200 (dim., 09 mai 2010) | 3 lines Untabify C files. Will watch buildbots. ........
Diffstat (limited to 'Modules/cjkcodecs/_codecs_iso2022.c')
-rw-r--r--Modules/cjkcodecs/_codecs_iso2022.c1622
1 files changed, 811 insertions, 811 deletions
diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c
index 9ce7b75..25c1a36 100644
--- a/Modules/cjkcodecs/_codecs_iso2022.c
+++ b/Modules/cjkcodecs/_codecs_iso2022.c
@@ -19,85 +19,85 @@
state->c[0-3]
- 00000000
- ||^^^^^|
- |+-----+---- G0-3 Character Set
- +----------- Is G0-3 double byte?
+ 00000000
+ ||^^^^^|
+ |+-----+---- G0-3 Character Set
+ +----------- Is G0-3 double byte?
state->c[4]
- 00000000
- ||
- |+---- Locked-Shift?
- +----- ESC Throughout
+ 00000000
+ ||
+ |+---- Locked-Shift?
+ +----- ESC Throughout
*/
-#define ESC 0x1B
-#define SO 0x0E
-#define SI 0x0F
-#define LF 0x0A
-
-#define MAX_ESCSEQLEN 16
-
-#define CHARSET_ISO8859_1 'A'
-#define CHARSET_ASCII 'B'
-#define CHARSET_ISO8859_7 'F'
-#define CHARSET_JISX0201_K 'I'
-#define CHARSET_JISX0201_R 'J'
-
-#define CHARSET_GB2312 ('A'|CHARSET_DBCS)
-#define CHARSET_JISX0208 ('B'|CHARSET_DBCS)
-#define CHARSET_KSX1001 ('C'|CHARSET_DBCS)
-#define CHARSET_JISX0212 ('D'|CHARSET_DBCS)
-#define CHARSET_GB2312_8565 ('E'|CHARSET_DBCS)
-#define CHARSET_CNS11643_1 ('G'|CHARSET_DBCS)
-#define CHARSET_CNS11643_2 ('H'|CHARSET_DBCS)
-#define CHARSET_JISX0213_2000_1 ('O'|CHARSET_DBCS)
-#define CHARSET_JISX0213_2 ('P'|CHARSET_DBCS)
-#define CHARSET_JISX0213_2004_1 ('Q'|CHARSET_DBCS)
-#define CHARSET_JISX0208_O ('@'|CHARSET_DBCS)
-
-#define CHARSET_DBCS 0x80
-#define ESCMARK(mark) ((mark) & 0x7f)
-
-#define IS_ESCEND(c) (((c) >= 'A' && (c) <= 'Z') || (c) == '@')
+#define ESC 0x1B
+#define SO 0x0E
+#define SI 0x0F
+#define LF 0x0A
+
+#define MAX_ESCSEQLEN 16
+
+#define CHARSET_ISO8859_1 'A'
+#define CHARSET_ASCII 'B'
+#define CHARSET_ISO8859_7 'F'
+#define CHARSET_JISX0201_K 'I'
+#define CHARSET_JISX0201_R 'J'
+
+#define CHARSET_GB2312 ('A'|CHARSET_DBCS)
+#define CHARSET_JISX0208 ('B'|CHARSET_DBCS)
+#define CHARSET_KSX1001 ('C'|CHARSET_DBCS)
+#define CHARSET_JISX0212 ('D'|CHARSET_DBCS)
+#define CHARSET_GB2312_8565 ('E'|CHARSET_DBCS)
+#define CHARSET_CNS11643_1 ('G'|CHARSET_DBCS)
+#define CHARSET_CNS11643_2 ('H'|CHARSET_DBCS)
+#define CHARSET_JISX0213_2000_1 ('O'|CHARSET_DBCS)
+#define CHARSET_JISX0213_2 ('P'|CHARSET_DBCS)
+#define CHARSET_JISX0213_2004_1 ('Q'|CHARSET_DBCS)
+#define CHARSET_JISX0208_O ('@'|CHARSET_DBCS)
+
+#define CHARSET_DBCS 0x80
+#define ESCMARK(mark) ((mark) & 0x7f)
+
+#define IS_ESCEND(c) (((c) >= 'A' && (c) <= 'Z') || (c) == '@')
#define IS_ISO2022ESC(c2) \
- ((c2) == '(' || (c2) == ')' || (c2) == '$' || \
- (c2) == '.' || (c2) == '&')
- /* this is not a complete list of ISO-2022 escape sequence headers.
- * but, it's enough to implement CJK instances of iso-2022. */
-
-#define MAP_UNMAPPABLE 0xFFFF
-#define MAP_MULTIPLE_AVAIL 0xFFFE /* for JIS X 0213 */
-
-#define F_SHIFTED 0x01
-#define F_ESCTHROUGHOUT 0x02
-
-#define STATE_SETG(dn, v) ((state)->c[dn]) = (v);
-#define STATE_GETG(dn) ((state)->c[dn])
-
-#define STATE_G0 STATE_GETG(0)
-#define STATE_G1 STATE_GETG(1)
-#define STATE_G2 STATE_GETG(2)
-#define STATE_G3 STATE_GETG(3)
-#define STATE_SETG0(v) STATE_SETG(0, v)
-#define STATE_SETG1(v) STATE_SETG(1, v)
-#define STATE_SETG2(v) STATE_SETG(2, v)
-#define STATE_SETG3(v) STATE_SETG(3, v)
-
-#define STATE_SETFLAG(f) ((state)->c[4]) |= (f);
-#define STATE_GETFLAG(f) ((state)->c[4] & (f))
-#define STATE_CLEARFLAG(f) ((state)->c[4]) &= ~(f);
-#define STATE_CLEARFLAGS() ((state)->c[4]) = 0;
-
-#define ISO2022_CONFIG ((const struct iso2022_config *)config)
-#define CONFIG_ISSET(flag) (ISO2022_CONFIG->flags & (flag))
-#define CONFIG_DESIGNATIONS (ISO2022_CONFIG->designations)
+ ((c2) == '(' || (c2) == ')' || (c2) == '$' || \
+ (c2) == '.' || (c2) == '&')
+ /* this is not a complete list of ISO-2022 escape sequence headers.
+ * but, it's enough to implement CJK instances of iso-2022. */
+
+#define MAP_UNMAPPABLE 0xFFFF
+#define MAP_MULTIPLE_AVAIL 0xFFFE /* for JIS X 0213 */
+
+#define F_SHIFTED 0x01
+#define F_ESCTHROUGHOUT 0x02
+
+#define STATE_SETG(dn, v) ((state)->c[dn]) = (v);
+#define STATE_GETG(dn) ((state)->c[dn])
+
+#define STATE_G0 STATE_GETG(0)
+#define STATE_G1 STATE_GETG(1)
+#define STATE_G2 STATE_GETG(2)
+#define STATE_G3 STATE_GETG(3)
+#define STATE_SETG0(v) STATE_SETG(0, v)
+#define STATE_SETG1(v) STATE_SETG(1, v)
+#define STATE_SETG2(v) STATE_SETG(2, v)
+#define STATE_SETG3(v) STATE_SETG(3, v)
+
+#define STATE_SETFLAG(f) ((state)->c[4]) |= (f);
+#define STATE_GETFLAG(f) ((state)->c[4] & (f))
+#define STATE_CLEARFLAG(f) ((state)->c[4]) &= ~(f);
+#define STATE_CLEARFLAGS() ((state)->c[4]) = 0;
+
+#define ISO2022_CONFIG ((const struct iso2022_config *)config)
+#define CONFIG_ISSET(flag) (ISO2022_CONFIG->flags & (flag))
+#define CONFIG_DESIGNATIONS (ISO2022_CONFIG->designations)
/* iso2022_config.flags */
-#define NO_SHIFT 0x01
-#define USE_G2 0x02
-#define USE_JISX0208_EXT 0x04
+#define NO_SHIFT 0x01
+#define USE_G2 0x02
+#define USE_JISX0208_EXT 0x04
/*-*- internal data structures -*-*/
@@ -106,434 +106,434 @@ typedef ucs4_t (*iso2022_decode_func)(const unsigned char *data);
typedef DBCHAR (*iso2022_encode_func)(const ucs4_t *data, Py_ssize_t *length);
struct iso2022_designation {
- unsigned char mark;
- unsigned char plane;
- unsigned char width;
- iso2022_init_func initializer;
- iso2022_decode_func decoder;
- iso2022_encode_func encoder;
+ unsigned char mark;
+ unsigned char plane;
+ unsigned char width;
+ iso2022_init_func initializer;
+ iso2022_decode_func decoder;
+ iso2022_encode_func encoder;
};
struct iso2022_config {
- int flags;
- const struct iso2022_designation *designations; /* non-ascii desigs */
+ int flags;
+ const struct iso2022_designation *designations; /* non-ascii desigs */
};
/*-*- iso-2022 codec implementation -*-*/
CODEC_INIT(iso2022)
{
- const struct iso2022_designation *desig = CONFIG_DESIGNATIONS;
- for (desig = CONFIG_DESIGNATIONS; desig->mark; desig++)
- if (desig->initializer != NULL && desig->initializer() != 0)
- return -1;
- return 0;
+ const struct iso2022_designation *desig = CONFIG_DESIGNATIONS;
+ for (desig = CONFIG_DESIGNATIONS; desig->mark; desig++)
+ if (desig->initializer != NULL && desig->initializer() != 0)
+ return -1;
+ return 0;
}
ENCODER_INIT(iso2022)
{
- STATE_CLEARFLAGS()
- STATE_SETG0(CHARSET_ASCII)
- STATE_SETG1(CHARSET_ASCII)
- return 0;
+ STATE_CLEARFLAGS()
+ STATE_SETG0(CHARSET_ASCII)
+ STATE_SETG1(CHARSET_ASCII)
+ return 0;
}
ENCODER_RESET(iso2022)
{
- if (STATE_GETFLAG(F_SHIFTED)) {
- WRITE1(SI)
- NEXT_OUT(1)
- STATE_CLEARFLAG(F_SHIFTED)
- }
- if (STATE_G0 != CHARSET_ASCII) {
- WRITE3(ESC, '(', 'B')
- NEXT_OUT(3)
- STATE_SETG0(CHARSET_ASCII)
- }
- return 0;
+ if (STATE_GETFLAG(F_SHIFTED)) {
+ WRITE1(SI)
+ NEXT_OUT(1)
+ STATE_CLEARFLAG(F_SHIFTED)
+ }
+ if (STATE_G0 != CHARSET_ASCII) {
+ WRITE3(ESC, '(', 'B')
+ NEXT_OUT(3)
+ STATE_SETG0(CHARSET_ASCII)
+ }
+ return 0;
}
ENCODER(iso2022)
{
- while (inleft > 0) {
- const struct iso2022_designation *dsg;
- DBCHAR encoded;
- ucs4_t c = **inbuf;
- Py_ssize_t insize;
-
- if (c < 0x80) {
- if (STATE_G0 != CHARSET_ASCII) {
- WRITE3(ESC, '(', 'B')
- STATE_SETG0(CHARSET_ASCII)
- NEXT_OUT(3)
- }
- if (STATE_GETFLAG(F_SHIFTED)) {
- WRITE1(SI)
- STATE_CLEARFLAG(F_SHIFTED)
- NEXT_OUT(1)
- }
- WRITE1((unsigned char)c)
- NEXT(1, 1)
- continue;
- }
-
- DECODE_SURROGATE(c)
- insize = GET_INSIZE(c);
-
- encoded = MAP_UNMAPPABLE;
- for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) {
- Py_ssize_t length = 1;
- encoded = dsg->encoder(&c, &length);
- if (encoded == MAP_MULTIPLE_AVAIL) {
- /* this implementation won't work for pair
- * of non-bmp characters. */
- if (inleft < 2) {
- if (!(flags & MBENC_FLUSH))
- return MBERR_TOOFEW;
- length = -1;
- }
- else
- length = 2;
+ while (inleft > 0) {
+ const struct iso2022_designation *dsg;
+ DBCHAR encoded;
+ ucs4_t c = **inbuf;
+ Py_ssize_t insize;
+
+ if (c < 0x80) {
+ if (STATE_G0 != CHARSET_ASCII) {
+ WRITE3(ESC, '(', 'B')
+ STATE_SETG0(CHARSET_ASCII)
+ NEXT_OUT(3)
+ }
+ if (STATE_GETFLAG(F_SHIFTED)) {
+ WRITE1(SI)
+ STATE_CLEARFLAG(F_SHIFTED)
+ NEXT_OUT(1)
+ }
+ WRITE1((unsigned char)c)
+ NEXT(1, 1)
+ continue;
+ }
+
+ DECODE_SURROGATE(c)
+ insize = GET_INSIZE(c);
+
+ encoded = MAP_UNMAPPABLE;
+ for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) {
+ Py_ssize_t length = 1;
+ encoded = dsg->encoder(&c, &length);
+ if (encoded == MAP_MULTIPLE_AVAIL) {
+ /* this implementation won't work for pair
+ * of non-bmp characters. */
+ if (inleft < 2) {
+ if (!(flags & MBENC_FLUSH))
+ return MBERR_TOOFEW;
+ length = -1;
+ }
+ else
+ length = 2;
#if Py_UNICODE_SIZE == 2
- if (length == 2) {
- ucs4_t u4in[2];
- u4in[0] = (ucs4_t)IN1;
- u4in[1] = (ucs4_t)IN2;
- encoded = dsg->encoder(u4in, &length);
- } else
- encoded = dsg->encoder(&c, &length);
+ if (length == 2) {
+ ucs4_t u4in[2];
+ u4in[0] = (ucs4_t)IN1;
+ u4in[1] = (ucs4_t)IN2;
+ encoded = dsg->encoder(u4in, &length);
+ } else
+ encoded = dsg->encoder(&c, &length);
#else
- encoded = dsg->encoder(&c, &length);
+ encoded = dsg->encoder(&c, &length);
#endif
- if (encoded != MAP_UNMAPPABLE) {
- insize = length;
- break;
- }
- }
- else if (encoded != MAP_UNMAPPABLE)
- break;
- }
-
- if (!dsg->mark)
- return 1;
- assert(dsg->width == 1 || dsg->width == 2);
-
- switch (dsg->plane) {
- case 0: /* G0 */
- if (STATE_GETFLAG(F_SHIFTED)) {
- WRITE1(SI)
- STATE_CLEARFLAG(F_SHIFTED)
- NEXT_OUT(1)
- }
- if (STATE_G0 != dsg->mark) {
- if (dsg->width == 1) {
- WRITE3(ESC, '(', ESCMARK(dsg->mark))
- STATE_SETG0(dsg->mark)
- NEXT_OUT(3)
- }
- else if (dsg->mark == CHARSET_JISX0208) {
- WRITE3(ESC, '$', ESCMARK(dsg->mark))
- STATE_SETG0(dsg->mark)
- NEXT_OUT(3)
- }
- else {
- WRITE4(ESC, '$', '(',
- ESCMARK(dsg->mark))
- STATE_SETG0(dsg->mark)
- NEXT_OUT(4)
- }
- }
- break;
- case 1: /* G1 */
- if (STATE_G1 != dsg->mark) {
- if (dsg->width == 1) {
- WRITE3(ESC, ')', ESCMARK(dsg->mark))
- STATE_SETG1(dsg->mark)
- NEXT_OUT(3)
- }
- else {
- WRITE4(ESC, '$', ')',
- ESCMARK(dsg->mark))
- STATE_SETG1(dsg->mark)
- NEXT_OUT(4)
- }
- }
- if (!STATE_GETFLAG(F_SHIFTED)) {
- WRITE1(SO)
- STATE_SETFLAG(F_SHIFTED)
- NEXT_OUT(1)
- }
- break;
- default: /* G2 and G3 is not supported: no encoding in
- * CJKCodecs are using them yet */
- return MBERR_INTERNAL;
- }
-
- if (dsg->width == 1) {
- WRITE1((unsigned char)encoded)
- NEXT_OUT(1)
- }
- else {
- WRITE2(encoded >> 8, encoded & 0xff)
- NEXT_OUT(2)
- }
- NEXT_IN(insize)
- }
-
- return 0;
+ if (encoded != MAP_UNMAPPABLE) {
+ insize = length;
+ break;
+ }
+ }
+ else if (encoded != MAP_UNMAPPABLE)
+ break;
+ }
+
+ if (!dsg->mark)
+ return 1;
+ assert(dsg->width == 1 || dsg->width == 2);
+
+ switch (dsg->plane) {
+ case 0: /* G0 */
+ if (STATE_GETFLAG(F_SHIFTED)) {
+ WRITE1(SI)
+ STATE_CLEARFLAG(F_SHIFTED)
+ NEXT_OUT(1)
+ }
+ if (STATE_G0 != dsg->mark) {
+ if (dsg->width == 1) {
+ WRITE3(ESC, '(', ESCMARK(dsg->mark))
+ STATE_SETG0(dsg->mark)
+ NEXT_OUT(3)
+ }
+ else if (dsg->mark == CHARSET_JISX0208) {
+ WRITE3(ESC, '$', ESCMARK(dsg->mark))
+ STATE_SETG0(dsg->mark)
+ NEXT_OUT(3)
+ }
+ else {
+ WRITE4(ESC, '$', '(',
+ ESCMARK(dsg->mark))
+ STATE_SETG0(dsg->mark)
+ NEXT_OUT(4)
+ }
+ }
+ break;
+ case 1: /* G1 */
+ if (STATE_G1 != dsg->mark) {
+ if (dsg->width == 1) {
+ WRITE3(ESC, ')', ESCMARK(dsg->mark))
+ STATE_SETG1(dsg->mark)
+ NEXT_OUT(3)
+ }
+ else {
+ WRITE4(ESC, '$', ')',
+ ESCMARK(dsg->mark))
+ STATE_SETG1(dsg->mark)
+ NEXT_OUT(4)
+ }
+ }
+ if (!STATE_GETFLAG(F_SHIFTED)) {
+ WRITE1(SO)
+ STATE_SETFLAG(F_SHIFTED)
+ NEXT_OUT(1)
+ }
+ break;
+ default: /* G2 and G3 is not supported: no encoding in
+ * CJKCodecs are using them yet */
+ return MBERR_INTERNAL;
+ }
+
+ if (dsg->width == 1) {
+ WRITE1((unsigned char)encoded)
+ NEXT_OUT(1)
+ }
+ else {
+ WRITE2(encoded >> 8, encoded & 0xff)
+ NEXT_OUT(2)
+ }
+ NEXT_IN(insize)
+ }
+
+ return 0;
}
DECODER_INIT(iso2022)
{
- STATE_CLEARFLAGS()
- STATE_SETG0(CHARSET_ASCII)
- STATE_SETG1(CHARSET_ASCII)
- STATE_SETG2(CHARSET_ASCII)
- return 0;
+ STATE_CLEARFLAGS()
+ STATE_SETG0(CHARSET_ASCII)
+ STATE_SETG1(CHARSET_ASCII)
+ STATE_SETG2(CHARSET_ASCII)
+ return 0;
}
DECODER_RESET(iso2022)
{
- STATE_SETG0(CHARSET_ASCII)
- STATE_CLEARFLAG(F_SHIFTED)
- return 0;
+ STATE_SETG0(CHARSET_ASCII)
+ STATE_CLEARFLAG(F_SHIFTED)
+ return 0;
}
static Py_ssize_t
iso2022processesc(const void *config, MultibyteCodec_State *state,
- const unsigned char **inbuf, Py_ssize_t *inleft)
+ const unsigned char **inbuf, Py_ssize_t *inleft)
{
- unsigned char charset, designation;
- Py_ssize_t i, esclen;
-
- for (i = 1;i < MAX_ESCSEQLEN;i++) {
- if (i >= *inleft)
- return MBERR_TOOFEW;
- if (IS_ESCEND((*inbuf)[i])) {
- esclen = i + 1;
- break;
- }
- else if (CONFIG_ISSET(USE_JISX0208_EXT) && i+1 < *inleft &&
- (*inbuf)[i] == '&' && (*inbuf)[i+1] == '@')
- i += 2;
- }
-
- if (i >= MAX_ESCSEQLEN)
- return 1; /* unterminated escape sequence */
-
- switch (esclen) {
- case 3:
- if (IN2 == '$') {
- charset = IN3 | CHARSET_DBCS;
- designation = 0;
- }
- else {
- charset = IN3;
- if (IN2 == '(') designation = 0;
- else if (IN2 == ')') designation = 1;
- else if (CONFIG_ISSET(USE_G2) && IN2 == '.')
- designation = 2;
- else return 3;
- }
- break;
- case 4:
- if (IN2 != '$')
- return 4;
-
- charset = IN4 | CHARSET_DBCS;
- if (IN3 == '(') designation = 0;
- else if (IN3 == ')') designation = 1;
- else return 4;
- break;
- case 6: /* designation with prefix */
- if (CONFIG_ISSET(USE_JISX0208_EXT) &&
- (*inbuf)[3] == ESC && (*inbuf)[4] == '$' &&
- (*inbuf)[5] == 'B') {
- charset = 'B' | CHARSET_DBCS;
- designation = 0;
- }
- else
- return 6;
- break;
- default:
- return esclen;
- }
-
- /* raise error when the charset is not designated for this encoding */
- if (charset != CHARSET_ASCII) {
- const struct iso2022_designation *dsg;
-
- for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++)
- if (dsg->mark == charset)
- break;
- if (!dsg->mark)
- return esclen;
- }
-
- STATE_SETG(designation, charset)
- *inleft -= esclen;
- (*inbuf) += esclen;
- return 0;
+ unsigned char charset, designation;
+ Py_ssize_t i, esclen;
+
+ for (i = 1;i < MAX_ESCSEQLEN;i++) {
+ if (i >= *inleft)
+ return MBERR_TOOFEW;
+ if (IS_ESCEND((*inbuf)[i])) {
+ esclen = i + 1;
+ break;
+ }
+ else if (CONFIG_ISSET(USE_JISX0208_EXT) && i+1 < *inleft &&
+ (*inbuf)[i] == '&' && (*inbuf)[i+1] == '@')
+ i += 2;
+ }
+
+ if (i >= MAX_ESCSEQLEN)
+ return 1; /* unterminated escape sequence */
+
+ switch (esclen) {
+ case 3:
+ if (IN2 == '$') {
+ charset = IN3 | CHARSET_DBCS;
+ designation = 0;
+ }
+ else {
+ charset = IN3;
+ if (IN2 == '(') designation = 0;
+ else if (IN2 == ')') designation = 1;
+ else if (CONFIG_ISSET(USE_G2) && IN2 == '.')
+ designation = 2;
+ else return 3;
+ }
+ break;
+ case 4:
+ if (IN2 != '$')
+ return 4;
+
+ charset = IN4 | CHARSET_DBCS;
+ if (IN3 == '(') designation = 0;
+ else if (IN3 == ')') designation = 1;
+ else return 4;
+ break;
+ case 6: /* designation with prefix */
+ if (CONFIG_ISSET(USE_JISX0208_EXT) &&
+ (*inbuf)[3] == ESC && (*inbuf)[4] == '$' &&
+ (*inbuf)[5] == 'B') {
+ charset = 'B' | CHARSET_DBCS;
+ designation = 0;
+ }
+ else
+ return 6;
+ break;
+ default:
+ return esclen;
+ }
+
+ /* raise error when the charset is not designated for this encoding */
+ if (charset != CHARSET_ASCII) {
+ const struct iso2022_designation *dsg;
+
+ for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++)
+ if (dsg->mark == charset)
+ break;
+ if (!dsg->mark)
+ return esclen;
+ }
+
+ STATE_SETG(designation, charset)
+ *inleft -= esclen;
+ (*inbuf) += esclen;
+ return 0;
}
-#define ISO8859_7_DECODE(c, assi) \
- if ((c) < 0xa0) (assi) = (c); \
- else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \
- (assi) = (c); \
- else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \
- (0xbffffd77L & (1L << ((c)-0xb4))))) \
- (assi) = 0x02d0 + (c); \
- else if ((c) == 0xa1) (assi) = 0x2018; \
- else if ((c) == 0xa2) (assi) = 0x2019; \
- else if ((c) == 0xaf) (assi) = 0x2015;
+#define ISO8859_7_DECODE(c, assi) \
+ if ((c) < 0xa0) (assi) = (c); \
+ else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \
+ (assi) = (c); \
+ else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \
+ (0xbffffd77L & (1L << ((c)-0xb4))))) \
+ (assi) = 0x02d0 + (c); \
+ else if ((c) == 0xa1) (assi) = 0x2018; \
+ else if ((c) == 0xa2) (assi) = 0x2019; \
+ else if ((c) == 0xaf) (assi) = 0x2015;
static Py_ssize_t
iso2022processg2(const void *config, MultibyteCodec_State *state,
- const unsigned char **inbuf, Py_ssize_t *inleft,
- Py_UNICODE **outbuf, Py_ssize_t *outleft)
+ const unsigned char **inbuf, Py_ssize_t *inleft,
+ Py_UNICODE **outbuf, Py_ssize_t *outleft)
{
- /* not written to use encoder, decoder functions because only few
- * encodings use G2 designations in CJKCodecs */
- if (STATE_G2 == CHARSET_ISO8859_1) {
- if (IN3 < 0x80)
- OUT1(IN3 + 0x80)
- else
- return 3;
- }
- else if (STATE_G2 == CHARSET_ISO8859_7) {
- ISO8859_7_DECODE(IN3 ^ 0x80, **outbuf)
- else return 3;
- }
- else if (STATE_G2 == CHARSET_ASCII) {
- if (IN3 & 0x80) return 3;
- else **outbuf = IN3;
- }
- else
- return MBERR_INTERNAL;
-
- (*inbuf) += 3;
- *inleft -= 3;
- (*outbuf) += 1;
- *outleft -= 1;
- return 0;
+ /* not written to use encoder, decoder functions because only few
+ * encodings use G2 designations in CJKCodecs */
+ if (STATE_G2 == CHARSET_ISO8859_1) {
+ if (IN3 < 0x80)
+ OUT1(IN3 + 0x80)
+ else
+ return 3;
+ }
+ else if (STATE_G2 == CHARSET_ISO8859_7) {
+ ISO8859_7_DECODE(IN3 ^ 0x80, **outbuf)
+ else return 3;
+ }
+ else if (STATE_G2 == CHARSET_ASCII) {
+ if (IN3 & 0x80) return 3;
+ else **outbuf = IN3;
+ }
+ else
+ return MBERR_INTERNAL;
+
+ (*inbuf) += 3;
+ *inleft -= 3;
+ (*outbuf) += 1;
+ *outleft -= 1;
+ return 0;
}
DECODER(iso2022)
{
- const struct iso2022_designation *dsgcache = NULL;
-
- while (inleft > 0) {
- unsigned char c = IN1;
- Py_ssize_t err;
-
- if (STATE_GETFLAG(F_ESCTHROUGHOUT)) {
- /* ESC throughout mode:
- * for non-iso2022 escape sequences */
- WRITE1(c) /* assume as ISO-8859-1 */
- NEXT(1, 1)
- if (IS_ESCEND(c)) {
- STATE_CLEARFLAG(F_ESCTHROUGHOUT)
- }
- continue;
- }
-
- switch (c) {
- case ESC:
- REQUIRE_INBUF(2)
- if (IS_ISO2022ESC(IN2)) {
- err = iso2022processesc(config, state,
- inbuf, &inleft);
- if (err != 0)
- return err;
- }
- else if (CONFIG_ISSET(USE_G2) && IN2 == 'N') {/* SS2 */
- REQUIRE_INBUF(3)
- err = iso2022processg2(config, state,
- inbuf, &inleft, outbuf, &outleft);
- if (err != 0)
- return err;
- }
- else {
- WRITE1(ESC)
- STATE_SETFLAG(F_ESCTHROUGHOUT)
- NEXT(1, 1)
- }
- break;
- case SI:
- if (CONFIG_ISSET(NO_SHIFT))
- goto bypass;
- STATE_CLEARFLAG(F_SHIFTED)
- NEXT_IN(1)
- break;
- case SO:
- if (CONFIG_ISSET(NO_SHIFT))
- goto bypass;
- STATE_SETFLAG(F_SHIFTED)
- NEXT_IN(1)
- break;
- case LF:
- STATE_CLEARFLAG(F_SHIFTED)
- WRITE1(LF)
- NEXT(1, 1)
- break;
- default:
- if (c < 0x20) /* C0 */
- goto bypass;
- else if (c >= 0x80)
- return 1;
- else {
- const struct iso2022_designation *dsg;
- unsigned char charset;
- ucs4_t decoded;
-
- if (STATE_GETFLAG(F_SHIFTED))
- charset = STATE_G1;
- else
- charset = STATE_G0;
-
- if (charset == CHARSET_ASCII) {
-bypass: WRITE1(c)
- NEXT(1, 1)
- break;
- }
-
- if (dsgcache != NULL &&
- dsgcache->mark == charset)
- dsg = dsgcache;
- else {
- for (dsg = CONFIG_DESIGNATIONS;
- dsg->mark != charset
+ const struct iso2022_designation *dsgcache = NULL;
+
+ while (inleft > 0) {
+ unsigned char c = IN1;
+ Py_ssize_t err;
+
+ if (STATE_GETFLAG(F_ESCTHROUGHOUT)) {
+ /* ESC throughout mode:
+ * for non-iso2022 escape sequences */
+ WRITE1(c) /* assume as ISO-8859-1 */
+ NEXT(1, 1)
+ if (IS_ESCEND(c)) {
+ STATE_CLEARFLAG(F_ESCTHROUGHOUT)
+ }
+ continue;
+ }
+
+ switch (c) {
+ case ESC:
+ REQUIRE_INBUF(2)
+ if (IS_ISO2022ESC(IN2)) {
+ err = iso2022processesc(config, state,
+ inbuf, &inleft);
+ if (err != 0)
+ return err;
+ }
+ else if (CONFIG_ISSET(USE_G2) && IN2 == 'N') {/* SS2 */
+ REQUIRE_INBUF(3)
+ err = iso2022processg2(config, state,
+ inbuf, &inleft, outbuf, &outleft);
+ if (err != 0)
+ return err;
+ }
+ else {
+ WRITE1(ESC)
+ STATE_SETFLAG(F_ESCTHROUGHOUT)
+ NEXT(1, 1)
+ }
+ break;
+ case SI:
+ if (CONFIG_ISSET(NO_SHIFT))
+ goto bypass;
+ STATE_CLEARFLAG(F_SHIFTED)
+ NEXT_IN(1)
+ break;
+ case SO:
+ if (CONFIG_ISSET(NO_SHIFT))
+ goto bypass;
+ STATE_SETFLAG(F_SHIFTED)
+ NEXT_IN(1)
+ break;
+ case LF:
+ STATE_CLEARFLAG(F_SHIFTED)
+ WRITE1(LF)
+ NEXT(1, 1)
+ break;
+ default:
+ if (c < 0x20) /* C0 */
+ goto bypass;
+ else if (c >= 0x80)
+ return 1;
+ else {
+ const struct iso2022_designation *dsg;
+ unsigned char charset;
+ ucs4_t decoded;
+
+ if (STATE_GETFLAG(F_SHIFTED))
+ charset = STATE_G1;
+ else
+ charset = STATE_G0;
+
+ if (charset == CHARSET_ASCII) {
+bypass: WRITE1(c)
+ NEXT(1, 1)
+ break;
+ }
+
+ if (dsgcache != NULL &&
+ dsgcache->mark == charset)
+ dsg = dsgcache;
+ else {
+ for (dsg = CONFIG_DESIGNATIONS;
+ dsg->mark != charset
#ifdef Py_DEBUG
- && dsg->mark != '\0'
+ && dsg->mark != '\0'
#endif
- ;dsg++)
- /* noop */;
- assert(dsg->mark != '\0');
- dsgcache = dsg;
- }
-
- REQUIRE_INBUF(dsg->width)
- decoded = dsg->decoder(*inbuf);
- if (decoded == MAP_UNMAPPABLE)
- return dsg->width;
-
- if (decoded < 0x10000) {
- WRITE1(decoded)
- NEXT_OUT(1)
- }
- else if (decoded < 0x30000) {
- WRITEUCS4(decoded)
- }
- else { /* JIS X 0213 pairs */
- WRITE2(decoded >> 16, decoded & 0xffff)
- NEXT_OUT(2)
- }
- NEXT_IN(dsg->width)
- }
- break;
- }
- }
- return 0;
+ ;dsg++)
+ /* noop */;
+ assert(dsg->mark != '\0');
+ dsgcache = dsg;
+ }
+
+ REQUIRE_INBUF(dsg->width)
+ decoded = dsg->decoder(*inbuf);
+ if (decoded == MAP_UNMAPPABLE)
+ return dsg->width;
+
+ if (decoded < 0x10000) {
+ WRITE1(decoded)
+ NEXT_OUT(1)
+ }
+ else if (decoded < 0x30000) {
+ WRITEUCS4(decoded)
+ }
+ else { /* JIS X 0213 pairs */
+ WRITE2(decoded >> 16, decoded & 0xffff)
+ NEXT_OUT(2)
+ }
+ NEXT_IN(dsg->width)
+ }
+ break;
+ }
+ }
+ return 0;
}
/*-*- mapping table holders -*-*/
@@ -567,542 +567,542 @@ DECMAP(gb2312)
static int
ksx1001_init(void)
{
- static int initialized = 0;
-
- if (!initialized && (
- IMPORT_MAP(kr, cp949, &cp949_encmap, NULL) ||
- IMPORT_MAP(kr, ksx1001, NULL, &ksx1001_decmap)))
- return -1;
- initialized = 1;
- return 0;
+ static int initialized = 0;
+
+ if (!initialized && (
+ IMPORT_MAP(kr, cp949, &cp949_encmap, NULL) ||
+ IMPORT_MAP(kr, ksx1001, NULL, &ksx1001_decmap)))
+ return -1;
+ initialized = 1;
+ return 0;
}
static ucs4_t
ksx1001_decoder(const unsigned char *data)
{
- ucs4_t u;
- TRYMAP_DEC(ksx1001, u, data[0], data[1])
- return u;
- else
- return MAP_UNMAPPABLE;
+ ucs4_t u;
+ TRYMAP_DEC(ksx1001, u, data[0], data[1])
+ return u;
+ else
+ return MAP_UNMAPPABLE;
}
static DBCHAR
ksx1001_encoder(const ucs4_t *data, Py_ssize_t *length)
{
- DBCHAR coded;
- assert(*length == 1);
- if (*data < 0x10000) {
- TRYMAP_ENC(cp949, coded, *data)
- if (!(coded & 0x8000))
- return coded;
- }
- return MAP_UNMAPPABLE;
+ DBCHAR coded;
+ assert(*length == 1);
+ if (*data < 0x10000) {
+ TRYMAP_ENC(cp949, coded, *data)
+ if (!(coded & 0x8000))
+ return coded;
+ }
+ return MAP_UNMAPPABLE;
}
static int
jisx0208_init(void)
{
- static int initialized = 0;
-
- if (!initialized && (
- IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL) ||
- IMPORT_MAP(jp, jisx0208, NULL, &jisx0208_decmap)))
- return -1;
- initialized = 1;
- return 0;
+ static int initialized = 0;
+
+ if (!initialized && (
+ IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL) ||
+ IMPORT_MAP(jp, jisx0208, NULL, &jisx0208_decmap)))
+ return -1;
+ initialized = 1;
+ return 0;
}
static ucs4_t
jisx0208_decoder(const unsigned char *data)
{
- ucs4_t u;
- if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
- return 0xff3c;
- else TRYMAP_DEC(jisx0208, u, data[0], data[1])
- return u;
- else
- return MAP_UNMAPPABLE;
+ ucs4_t u;
+ if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
+ return 0xff3c;
+ else TRYMAP_DEC(jisx0208, u, data[0], data[1])
+ return u;
+ else
+ return MAP_UNMAPPABLE;
}
static DBCHAR
jisx0208_encoder(const ucs4_t *data, Py_ssize_t *length)
{
- DBCHAR coded;
- assert(*length == 1);
- if (*data < 0x10000) {
- if (*data == 0xff3c) /* F/W REVERSE SOLIDUS */
- return 0x2140;
- else TRYMAP_ENC(jisxcommon, coded, *data) {
- if (!(coded & 0x8000))
- return coded;
- }
- }
- return MAP_UNMAPPABLE;
+ DBCHAR coded;
+ assert(*length == 1);
+ if (*data < 0x10000) {
+ if (*data == 0xff3c) /* F/W REVERSE SOLIDUS */
+ return 0x2140;
+ else TRYMAP_ENC(jisxcommon, coded, *data) {
+ if (!(coded & 0x8000))
+ return coded;
+ }
+ }
+ return MAP_UNMAPPABLE;
}
static int
jisx0212_init(void)
{
- static int initialized = 0;
-
- if (!initialized && (
- IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL) ||
- IMPORT_MAP(jp, jisx0212, NULL, &jisx0212_decmap)))
- return -1;
- initialized = 1;
- return 0;
+ static int initialized = 0;
+
+ if (!initialized && (
+ IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL) ||
+ IMPORT_MAP(jp, jisx0212, NULL, &jisx0212_decmap)))
+ return -1;
+ initialized = 1;
+ return 0;
}
static ucs4_t
jisx0212_decoder(const unsigned char *data)
{
- ucs4_t u;
- TRYMAP_DEC(jisx0212, u, data[0], data[1])
- return u;
- else
- return MAP_UNMAPPABLE;
+ ucs4_t u;
+ TRYMAP_DEC(jisx0212, u, data[0], data[1])
+ return u;
+ else
+ return MAP_UNMAPPABLE;
}
static DBCHAR
jisx0212_encoder(const ucs4_t *data, Py_ssize_t *length)
{
- DBCHAR coded;
- assert(*length == 1);
- if (*data < 0x10000) {
- TRYMAP_ENC(jisxcommon, coded, *data) {
- if (coded & 0x8000)
- return coded & 0x7fff;
- }
- }
- return MAP_UNMAPPABLE;
+ DBCHAR coded;
+ assert(*length == 1);
+ if (*data < 0x10000) {
+ TRYMAP_ENC(jisxcommon, coded, *data) {
+ if (coded & 0x8000)
+ return coded & 0x7fff;
+ }
+ }
+ return MAP_UNMAPPABLE;
}
static int
jisx0213_init(void)
{
- static int initialized = 0;
-
- if (!initialized && (
- jisx0208_init() ||
- IMPORT_MAP(jp, jisx0213_bmp,
- &jisx0213_bmp_encmap, NULL) ||
- IMPORT_MAP(jp, jisx0213_1_bmp,
- NULL, &jisx0213_1_bmp_decmap) ||
- IMPORT_MAP(jp, jisx0213_2_bmp,
- NULL, &jisx0213_2_bmp_decmap) ||
- IMPORT_MAP(jp, jisx0213_emp,
- &jisx0213_emp_encmap, NULL) ||
- IMPORT_MAP(jp, jisx0213_1_emp,
- NULL, &jisx0213_1_emp_decmap) ||
- IMPORT_MAP(jp, jisx0213_2_emp,
- NULL, &jisx0213_2_emp_decmap) ||
- IMPORT_MAP(jp, jisx0213_pair, &jisx0213_pair_encmap,
- &jisx0213_pair_decmap)))
- return -1;
- initialized = 1;
- return 0;
+ static int initialized = 0;
+
+ if (!initialized && (
+ jisx0208_init() ||
+ IMPORT_MAP(jp, jisx0213_bmp,
+ &jisx0213_bmp_encmap, NULL) ||
+ IMPORT_MAP(jp, jisx0213_1_bmp,
+ NULL, &jisx0213_1_bmp_decmap) ||
+ IMPORT_MAP(jp, jisx0213_2_bmp,
+ NULL, &jisx0213_2_bmp_decmap) ||
+ IMPORT_MAP(jp, jisx0213_emp,
+ &jisx0213_emp_encmap, NULL) ||
+ IMPORT_MAP(jp, jisx0213_1_emp,
+ NULL, &jisx0213_1_emp_decmap) ||
+ IMPORT_MAP(jp, jisx0213_2_emp,
+ NULL, &jisx0213_2_emp_decmap) ||
+ IMPORT_MAP(jp, jisx0213_pair, &jisx0213_pair_encmap,
+ &jisx0213_pair_decmap)))
+ return -1;
+ initialized = 1;
+ return 0;
}
#define config ((void *)2000)
static ucs4_t
jisx0213_2000_1_decoder(const unsigned char *data)
{
- ucs4_t u;
- EMULATE_JISX0213_2000_DECODE_PLANE1(u, data[0], data[1])
- else if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
- return 0xff3c;
- else TRYMAP_DEC(jisx0208, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])
- u |= 0x20000;
- else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]);
- else
- return MAP_UNMAPPABLE;
- return u;
+ ucs4_t u;
+ EMULATE_JISX0213_2000_DECODE_PLANE1(u, data[0], data[1])
+ else if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
+ return 0xff3c;
+ else TRYMAP_DEC(jisx0208, u, data[0], data[1]);
+ else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);
+ else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])
+ u |= 0x20000;
+ else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]);
+ else
+ return MAP_UNMAPPABLE;
+ return u;
}
static ucs4_t
jisx0213_2000_2_decoder(const unsigned char *data)
{
- ucs4_t u;
- EMULATE_JISX0213_2000_DECODE_PLANE2(u, data[0], data[1])
- TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])
- u |= 0x20000;
- else
- return MAP_UNMAPPABLE;
- return u;
+ ucs4_t u;
+ EMULATE_JISX0213_2000_DECODE_PLANE2(u, data[0], data[1])
+ TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);
+ else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])
+ u |= 0x20000;
+ else
+ return MAP_UNMAPPABLE;
+ return u;
}
#undef config
static ucs4_t
jisx0213_2004_1_decoder(const unsigned char *data)
{
- ucs4_t u;
- if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
- return 0xff3c;
- else TRYMAP_DEC(jisx0208, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])
- u |= 0x20000;
- else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]);
- else
- return MAP_UNMAPPABLE;
- return u;
+ ucs4_t u;
+ if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
+ return 0xff3c;
+ else TRYMAP_DEC(jisx0208, u, data[0], data[1]);
+ else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);
+ else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])
+ u |= 0x20000;
+ else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]);
+ else
+ return MAP_UNMAPPABLE;
+ return u;
}
static ucs4_t
jisx0213_2004_2_decoder(const unsigned char *data)
{
- ucs4_t u;
- TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])
- u |= 0x20000;
- else
- return MAP_UNMAPPABLE;
- return u;
+ ucs4_t u;
+ TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);
+ else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])
+ u |= 0x20000;
+ else
+ return MAP_UNMAPPABLE;
+ return u;
}
static DBCHAR
jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config)
{
- DBCHAR coded;
-
- switch (*length) {
- case 1: /* first character */
- if (*data >= 0x10000) {
- if ((*data) >> 16 == 0x20000 >> 16) {
- EMULATE_JISX0213_2000_ENCODE_EMP(coded, *data)
- else TRYMAP_ENC(jisx0213_emp, coded,
- (*data) & 0xffff)
- return coded;
- }
- return MAP_UNMAPPABLE;
- }
-
- EMULATE_JISX0213_2000_ENCODE_BMP(coded, *data)
- else TRYMAP_ENC(jisx0213_bmp, coded, *data) {
- if (coded == MULTIC)
- return MAP_MULTIPLE_AVAIL;
- }
- else TRYMAP_ENC(jisxcommon, coded, *data) {
- if (coded & 0x8000)
- return MAP_UNMAPPABLE;
- }
- else
- return MAP_UNMAPPABLE;
- return coded;
- case 2: /* second character of unicode pair */
- coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1],
- jisx0213_pair_encmap, JISX0213_ENCPAIRS);
- if (coded == DBCINV) {
- *length = 1;
- coded = find_pairencmap((ucs2_t)data[0], 0,
- jisx0213_pair_encmap, JISX0213_ENCPAIRS);
- if (coded == DBCINV)
- return MAP_UNMAPPABLE;
- }
- else
- return coded;
- case -1: /* flush unterminated */
- *length = 1;
- coded = find_pairencmap((ucs2_t)data[0], 0,
- jisx0213_pair_encmap, JISX0213_ENCPAIRS);
- if (coded == DBCINV)
- return MAP_UNMAPPABLE;
- else
- return coded;
- default:
- return MAP_UNMAPPABLE;
- }
+ DBCHAR coded;
+
+ switch (*length) {
+ case 1: /* first character */
+ if (*data >= 0x10000) {
+ if ((*data) >> 16 == 0x20000 >> 16) {
+ EMULATE_JISX0213_2000_ENCODE_EMP(coded, *data)
+ else TRYMAP_ENC(jisx0213_emp, coded,
+ (*data) & 0xffff)
+ return coded;
+ }
+ return MAP_UNMAPPABLE;
+ }
+
+ EMULATE_JISX0213_2000_ENCODE_BMP(coded, *data)
+ else TRYMAP_ENC(jisx0213_bmp, coded, *data) {
+ if (coded == MULTIC)
+ return MAP_MULTIPLE_AVAIL;
+ }
+ else TRYMAP_ENC(jisxcommon, coded, *data) {
+ if (coded & 0x8000)
+ return MAP_UNMAPPABLE;
+ }
+ else
+ return MAP_UNMAPPABLE;
+ return coded;
+ case 2: /* second character of unicode pair */
+ coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1],
+ jisx0213_pair_encmap, JISX0213_ENCPAIRS);
+ if (coded == DBCINV) {
+ *length = 1;
+ coded = find_pairencmap((ucs2_t)data[0], 0,
+ jisx0213_pair_encmap, JISX0213_ENCPAIRS);
+ if (coded == DBCINV)
+ return MAP_UNMAPPABLE;
+ }
+ else
+ return coded;
+ case -1: /* flush unterminated */
+ *length = 1;
+ coded = find_pairencmap((ucs2_t)data[0], 0,
+ jisx0213_pair_encmap, JISX0213_ENCPAIRS);
+ if (coded == DBCINV)
+ return MAP_UNMAPPABLE;
+ else
+ return coded;
+ default:
+ return MAP_UNMAPPABLE;
+ }
}
static DBCHAR
jisx0213_2000_1_encoder(const ucs4_t *data, Py_ssize_t *length)
{
- DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
- if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
- return coded;
- else if (coded & 0x8000)
- return MAP_UNMAPPABLE;
- else
- return coded;
+ DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
+ if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
+ return coded;
+ else if (coded & 0x8000)
+ return MAP_UNMAPPABLE;
+ else
+ return coded;
}
static DBCHAR
jisx0213_2000_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)
{
- DBCHAR coded;
- Py_ssize_t ilength = *length;
-
- coded = jisx0213_encoder(data, length, (void *)2000);
- switch (ilength) {
- case 1:
- if (coded == MAP_MULTIPLE_AVAIL)
- return MAP_MULTIPLE_AVAIL;
- else
- return MAP_UNMAPPABLE;
- case 2:
- if (*length != 2)
- return MAP_UNMAPPABLE;
- else
- return coded;
- default:
- return MAP_UNMAPPABLE;
- }
+ DBCHAR coded;
+ Py_ssize_t ilength = *length;
+
+ coded = jisx0213_encoder(data, length, (void *)2000);
+ switch (ilength) {
+ case 1:
+ if (coded == MAP_MULTIPLE_AVAIL)
+ return MAP_MULTIPLE_AVAIL;
+ else
+ return MAP_UNMAPPABLE;
+ case 2:
+ if (*length != 2)
+ return MAP_UNMAPPABLE;
+ else
+ return coded;
+ default:
+ return MAP_UNMAPPABLE;
+ }
}
static DBCHAR
jisx0213_2000_2_encoder(const ucs4_t *data, Py_ssize_t *length)
{
- DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
- if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
- return coded;
- else if (coded & 0x8000)
- return coded & 0x7fff;
- else
- return MAP_UNMAPPABLE;
+ DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
+ if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
+ return coded;
+ else if (coded & 0x8000)
+ return coded & 0x7fff;
+ else
+ return MAP_UNMAPPABLE;
}
static DBCHAR
jisx0213_2004_1_encoder(const ucs4_t *data, Py_ssize_t *length)
{
- DBCHAR coded = jisx0213_encoder(data, length, NULL);
- if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
- return coded;
- else if (coded & 0x8000)
- return MAP_UNMAPPABLE;
- else
- return coded;
+ DBCHAR coded = jisx0213_encoder(data, length, NULL);
+ if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
+ return coded;
+ else if (coded & 0x8000)
+ return MAP_UNMAPPABLE;
+ else
+ return coded;
}
static DBCHAR
jisx0213_2004_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)
{
- DBCHAR coded;
- Py_ssize_t ilength = *length;
-
- coded = jisx0213_encoder(data, length, NULL);
- switch (ilength) {
- case 1:
- if (coded == MAP_MULTIPLE_AVAIL)
- return MAP_MULTIPLE_AVAIL;
- else
- return MAP_UNMAPPABLE;
- case 2:
- if (*length != 2)
- return MAP_UNMAPPABLE;
- else
- return coded;
- default:
- return MAP_UNMAPPABLE;
- }
+ DBCHAR coded;
+ Py_ssize_t ilength = *length;
+
+ coded = jisx0213_encoder(data, length, NULL);
+ switch (ilength) {
+ case 1:
+ if (coded == MAP_MULTIPLE_AVAIL)
+ return MAP_MULTIPLE_AVAIL;
+ else
+ return MAP_UNMAPPABLE;
+ case 2:
+ if (*length != 2)
+ return MAP_UNMAPPABLE;
+ else
+ return coded;
+ default:
+ return MAP_UNMAPPABLE;
+ }
}
static DBCHAR
jisx0213_2004_2_encoder(const ucs4_t *data, Py_ssize_t *length)
{
- DBCHAR coded = jisx0213_encoder(data, length, NULL);
- if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
- return coded;
- else if (coded & 0x8000)
- return coded & 0x7fff;
- else
- return MAP_UNMAPPABLE;
+ DBCHAR coded = jisx0213_encoder(data, length, NULL);
+ if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
+ return coded;
+ else if (coded & 0x8000)
+ return coded & 0x7fff;
+ else
+ return MAP_UNMAPPABLE;
}
static ucs4_t
jisx0201_r_decoder(const unsigned char *data)
{
- ucs4_t u;
- JISX0201_R_DECODE(*data, u)
- else return MAP_UNMAPPABLE;
- return u;
+ ucs4_t u;
+ JISX0201_R_DECODE(*data, u)
+ else return MAP_UNMAPPABLE;
+ return u;
}
static DBCHAR
jisx0201_r_encoder(const ucs4_t *data, Py_ssize_t *length)
{
- DBCHAR coded;
- JISX0201_R_ENCODE(*data, coded)
- else return MAP_UNMAPPABLE;
- return coded;
+ DBCHAR coded;
+ JISX0201_R_ENCODE(*data, coded)
+ else return MAP_UNMAPPABLE;
+ return coded;
}
static ucs4_t
jisx0201_k_decoder(const unsigned char *data)
{
- ucs4_t u;
- JISX0201_K_DECODE(*data ^ 0x80, u)
- else return MAP_UNMAPPABLE;
- return u;
+ ucs4_t u;
+ JISX0201_K_DECODE(*data ^ 0x80, u)
+ else return MAP_UNMAPPABLE;
+ return u;
}
static DBCHAR
jisx0201_k_encoder(const ucs4_t *data, Py_ssize_t *length)
{
- DBCHAR coded;
- JISX0201_K_ENCODE(*data, coded)
- else return MAP_UNMAPPABLE;
- return coded - 0x80;
+ DBCHAR coded;
+ JISX0201_K_ENCODE(*data, coded)
+ else return MAP_UNMAPPABLE;
+ return coded - 0x80;
}
static int
gb2312_init(void)
{
- static int initialized = 0;
-
- if (!initialized && (
- IMPORT_MAP(cn, gbcommon, &gbcommon_encmap, NULL) ||
- IMPORT_MAP(cn, gb2312, NULL, &gb2312_decmap)))
- return -1;
- initialized = 1;
- return 0;
+ static int initialized = 0;
+
+ if (!initialized && (
+ IMPORT_MAP(cn, gbcommon, &gbcommon_encmap, NULL) ||
+ IMPORT_MAP(cn, gb2312, NULL, &gb2312_decmap)))
+ return -1;
+ initialized = 1;
+ return 0;
}
static ucs4_t
gb2312_decoder(const unsigned char *data)
{
- ucs4_t u;
- TRYMAP_DEC(gb2312, u, data[0], data[1])
- return u;
- else
- return MAP_UNMAPPABLE;
+ ucs4_t u;
+ TRYMAP_DEC(gb2312, u, data[0], data[1])
+ return u;
+ else
+ return MAP_UNMAPPABLE;
}
static DBCHAR
gb2312_encoder(const ucs4_t *data, Py_ssize_t *length)
{
- DBCHAR coded;
- assert(*length == 1);
- if (*data < 0x10000) {
- TRYMAP_ENC(gbcommon, coded, *data) {
- if (!(coded & 0x8000))
- return coded;
- }
- }
- return MAP_UNMAPPABLE;
+ DBCHAR coded;
+ assert(*length == 1);
+ if (*data < 0x10000) {
+ TRYMAP_ENC(gbcommon, coded, *data) {
+ if (!(coded & 0x8000))
+ return coded;
+ }
+ }
+ return MAP_UNMAPPABLE;
}
static ucs4_t
dummy_decoder(const unsigned char *data)
{
- return MAP_UNMAPPABLE;
+ return MAP_UNMAPPABLE;
}
static DBCHAR
dummy_encoder(const ucs4_t *data, Py_ssize_t *length)
{
- return MAP_UNMAPPABLE;
+ return MAP_UNMAPPABLE;
}
/*-*- registry tables -*-*/
-#define REGISTRY_KSX1001_G0 { CHARSET_KSX1001, 0, 2, \
- ksx1001_init, \
- ksx1001_decoder, ksx1001_encoder }
-#define REGISTRY_KSX1001_G1 { CHARSET_KSX1001, 1, 2, \
- ksx1001_init, \
- ksx1001_decoder, ksx1001_encoder }
-#define REGISTRY_JISX0201_R { CHARSET_JISX0201_R, 0, 1, \
- NULL, \
- jisx0201_r_decoder, jisx0201_r_encoder }
-#define REGISTRY_JISX0201_K { CHARSET_JISX0201_K, 0, 1, \
- NULL, \
- jisx0201_k_decoder, jisx0201_k_encoder }
-#define REGISTRY_JISX0208 { CHARSET_JISX0208, 0, 2, \
- jisx0208_init, \
- jisx0208_decoder, jisx0208_encoder }
-#define REGISTRY_JISX0208_O { CHARSET_JISX0208_O, 0, 2, \
- jisx0208_init, \
- jisx0208_decoder, jisx0208_encoder }
-#define REGISTRY_JISX0212 { CHARSET_JISX0212, 0, 2, \
- jisx0212_init, \
- jisx0212_decoder, jisx0212_encoder }
-#define REGISTRY_JISX0213_2000_1 { CHARSET_JISX0213_2000_1, 0, 2, \
- jisx0213_init, \
- jisx0213_2000_1_decoder, \
- jisx0213_2000_1_encoder }
+#define REGISTRY_KSX1001_G0 { CHARSET_KSX1001, 0, 2, \
+ ksx1001_init, \
+ ksx1001_decoder, ksx1001_encoder }
+#define REGISTRY_KSX1001_G1 { CHARSET_KSX1001, 1, 2, \
+ ksx1001_init, \
+ ksx1001_decoder, ksx1001_encoder }
+#define REGISTRY_JISX0201_R { CHARSET_JISX0201_R, 0, 1, \
+ NULL, \
+ jisx0201_r_decoder, jisx0201_r_encoder }
+#define REGISTRY_JISX0201_K { CHARSET_JISX0201_K, 0, 1, \
+ NULL, \
+ jisx0201_k_decoder, jisx0201_k_encoder }
+#define REGISTRY_JISX0208 { CHARSET_JISX0208, 0, 2, \
+ jisx0208_init, \
+ jisx0208_decoder, jisx0208_encoder }
+#define REGISTRY_JISX0208_O { CHARSET_JISX0208_O, 0, 2, \
+ jisx0208_init, \
+ jisx0208_decoder, jisx0208_encoder }
+#define REGISTRY_JISX0212 { CHARSET_JISX0212, 0, 2, \
+ jisx0212_init, \
+ jisx0212_decoder, jisx0212_encoder }
+#define REGISTRY_JISX0213_2000_1 { CHARSET_JISX0213_2000_1, 0, 2, \
+ jisx0213_init, \
+ jisx0213_2000_1_decoder, \
+ jisx0213_2000_1_encoder }
#define REGISTRY_JISX0213_2000_1_PAIRONLY { CHARSET_JISX0213_2000_1, 0, 2, \
- jisx0213_init, \
- jisx0213_2000_1_decoder, \
- jisx0213_2000_1_encoder_paironly }
-#define REGISTRY_JISX0213_2000_2 { CHARSET_JISX0213_2, 0, 2, \
- jisx0213_init, \
- jisx0213_2000_2_decoder, \
- jisx0213_2000_2_encoder }
-#define REGISTRY_JISX0213_2004_1 { CHARSET_JISX0213_2004_1, 0, 2, \
- jisx0213_init, \
- jisx0213_2004_1_decoder, \
- jisx0213_2004_1_encoder }
+ jisx0213_init, \
+ jisx0213_2000_1_decoder, \
+ jisx0213_2000_1_encoder_paironly }
+#define REGISTRY_JISX0213_2000_2 { CHARSET_JISX0213_2, 0, 2, \
+ jisx0213_init, \
+ jisx0213_2000_2_decoder, \
+ jisx0213_2000_2_encoder }
+#define REGISTRY_JISX0213_2004_1 { CHARSET_JISX0213_2004_1, 0, 2, \
+ jisx0213_init, \
+ jisx0213_2004_1_decoder, \
+ jisx0213_2004_1_encoder }
#define REGISTRY_JISX0213_2004_1_PAIRONLY { CHARSET_JISX0213_2004_1, 0, 2, \
- jisx0213_init, \
- jisx0213_2004_1_decoder, \
- jisx0213_2004_1_encoder_paironly }
-#define REGISTRY_JISX0213_2004_2 { CHARSET_JISX0213_2, 0, 2, \
- jisx0213_init, \
- jisx0213_2004_2_decoder, \
- jisx0213_2004_2_encoder }
-#define REGISTRY_GB2312 { CHARSET_GB2312, 0, 2, \
- gb2312_init, \
- gb2312_decoder, gb2312_encoder }
-#define REGISTRY_CNS11643_1 { CHARSET_CNS11643_1, 1, 2, \
- cns11643_init, \
- cns11643_1_decoder, cns11643_1_encoder }
-#define REGISTRY_CNS11643_2 { CHARSET_CNS11643_2, 2, 2, \
- cns11643_init, \
- cns11643_2_decoder, cns11643_2_encoder }
-#define REGISTRY_ISO8859_1 { CHARSET_ISO8859_1, 2, 1, \
- NULL, dummy_decoder, dummy_encoder }
-#define REGISTRY_ISO8859_7 { CHARSET_ISO8859_7, 2, 1, \
- NULL, dummy_decoder, dummy_encoder }
-#define REGISTRY_SENTINEL { 0, }
-#define CONFIGDEF(var, attrs) \
- static const struct iso2022_config iso2022_##var##_config = { \
- attrs, iso2022_##var##_designations \
- };
+ jisx0213_init, \
+ jisx0213_2004_1_decoder, \
+ jisx0213_2004_1_encoder_paironly }
+#define REGISTRY_JISX0213_2004_2 { CHARSET_JISX0213_2, 0, 2, \
+ jisx0213_init, \
+ jisx0213_2004_2_decoder, \
+ jisx0213_2004_2_encoder }
+#define REGISTRY_GB2312 { CHARSET_GB2312, 0, 2, \
+ gb2312_init, \
+ gb2312_decoder, gb2312_encoder }
+#define REGISTRY_CNS11643_1 { CHARSET_CNS11643_1, 1, 2, \
+ cns11643_init, \
+ cns11643_1_decoder, cns11643_1_encoder }
+#define REGISTRY_CNS11643_2 { CHARSET_CNS11643_2, 2, 2, \
+ cns11643_init, \
+ cns11643_2_decoder, cns11643_2_encoder }
+#define REGISTRY_ISO8859_1 { CHARSET_ISO8859_1, 2, 1, \
+ NULL, dummy_decoder, dummy_encoder }
+#define REGISTRY_ISO8859_7 { CHARSET_ISO8859_7, 2, 1, \
+ NULL, dummy_decoder, dummy_encoder }
+#define REGISTRY_SENTINEL { 0, }
+#define CONFIGDEF(var, attrs) \
+ static const struct iso2022_config iso2022_##var##_config = { \
+ attrs, iso2022_##var##_designations \
+ };
static const struct iso2022_designation iso2022_kr_designations[] = {
- REGISTRY_KSX1001_G1, REGISTRY_SENTINEL
+ REGISTRY_KSX1001_G1, REGISTRY_SENTINEL
};
CONFIGDEF(kr, 0)
static const struct iso2022_designation iso2022_jp_designations[] = {
- REGISTRY_JISX0208, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O,
- REGISTRY_SENTINEL
+ REGISTRY_JISX0208, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O,
+ REGISTRY_SENTINEL
};
CONFIGDEF(jp, NO_SHIFT | USE_JISX0208_EXT)
static const struct iso2022_designation iso2022_jp_1_designations[] = {
- REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_JISX0201_R,
- REGISTRY_JISX0208_O, REGISTRY_SENTINEL
+ REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_JISX0201_R,
+ REGISTRY_JISX0208_O, REGISTRY_SENTINEL
};
CONFIGDEF(jp_1, NO_SHIFT | USE_JISX0208_EXT)
static const struct iso2022_designation iso2022_jp_2_designations[] = {
- REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_KSX1001_G0,
- REGISTRY_GB2312, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O,
- REGISTRY_ISO8859_1, REGISTRY_ISO8859_7, REGISTRY_SENTINEL
+ REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_KSX1001_G0,
+ REGISTRY_GB2312, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O,
+ REGISTRY_ISO8859_1, REGISTRY_ISO8859_7, REGISTRY_SENTINEL
};
CONFIGDEF(jp_2, NO_SHIFT | USE_G2 | USE_JISX0208_EXT)
static const struct iso2022_designation iso2022_jp_2004_designations[] = {
- REGISTRY_JISX0213_2004_1_PAIRONLY, REGISTRY_JISX0208,
- REGISTRY_JISX0213_2004_1, REGISTRY_JISX0213_2004_2, REGISTRY_SENTINEL
+ REGISTRY_JISX0213_2004_1_PAIRONLY, REGISTRY_JISX0208,
+ REGISTRY_JISX0213_2004_1, REGISTRY_JISX0213_2004_2, REGISTRY_SENTINEL
};
CONFIGDEF(jp_2004, NO_SHIFT | USE_JISX0208_EXT)
static const struct iso2022_designation iso2022_jp_3_designations[] = {
- REGISTRY_JISX0213_2000_1_PAIRONLY, REGISTRY_JISX0208,
- REGISTRY_JISX0213_2000_1, REGISTRY_JISX0213_2000_2, REGISTRY_SENTINEL
+ REGISTRY_JISX0213_2000_1_PAIRONLY, REGISTRY_JISX0208,
+ REGISTRY_JISX0213_2000_1, REGISTRY_JISX0213_2000_2, REGISTRY_SENTINEL
};
CONFIGDEF(jp_3, NO_SHIFT | USE_JISX0208_EXT)
static const struct iso2022_designation iso2022_jp_ext_designations[] = {
- REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_JISX0201_R,
- REGISTRY_JISX0201_K, REGISTRY_JISX0208_O, REGISTRY_SENTINEL
+ REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_JISX0201_R,
+ REGISTRY_JISX0201_K, REGISTRY_JISX0208_O, REGISTRY_SENTINEL
};
CONFIGDEF(jp_ext, NO_SHIFT | USE_JISX0208_EXT)
@@ -1111,11 +1111,11 @@ BEGIN_MAPPINGS_LIST
/* no mapping table here */
END_MAPPINGS_LIST
-#define ISO2022_CODEC(variation) { \
- "iso2022_" #variation, \
- &iso2022_##variation##_config, \
- iso2022_codec_init, \
- _STATEFUL_METHODS(iso2022) \
+#define ISO2022_CODEC(variation) { \
+ "iso2022_" #variation, \
+ &iso2022_##variation##_config, \
+ iso2022_codec_init, \
+ _STATEFUL_METHODS(iso2022) \
},
BEGIN_CODECS_LIST